Source code for mlconjug3.mlconjug

"""
mlconjug3 Main module.

This module provides an easy-to-use interface for conjugating verbs using machine learning models.
It includes a pre-trained model for French, English, Spanish, Italian, Portuguese and Romanian verbs,
as well as interfaces for training custom models and conjugating verbs in multiple languages.

The main class of the module is Conjugator, which provides the conjugate() method for conjugating verbs.
The class also manages the Verbiste data set and provides an interface with the scikit-learn pipeline.
The class can be initialized with a specific language and a custom model, otherwise the default language is French
and the pre-trained French conjugation pipeline is used.

The module also includes helper classes for managing verb data, such as VerbInfo and Verb, as well as utility
functions for feature extraction and evaluation.
"""

from .PyVerbiste import Verbiste

from .conjug_manager import ConjugManager

from .constants import *

from .verbs import *

from .feature_extractor import extract_verb_features

from .dataset import DataSet

from .models import Model

from .utils import logger

from functools import lru_cache
from concurrent.futures import ProcessPoolExecutor
from random import Random
from collections import defaultdict
import joblib
import pkg_resources
import re
from zipfile import ZipFile


VERBS = {
    "fr": VerbFr,
    "en": VerbEn,
    "es": VerbEs,
    "it": VerbIt,
    "pt": VerbPt,
    "ro": VerbRo,
}


[docs]class Conjugator: """ | This is the main class of the project. | The class manages the Verbiste data set and provides an interface with the scikit-learn pipeline. | If no parameters are provided, the default language is set to french and the pre-trained french conjugation pipeline is used. | The class defines the method conjugate(verb, language) which is the main method of the module. :param language: string. Language of the conjugator. The default language is 'fr' for french. :param model: mlconjug3.Model or scikit-learn Pipeline or Classifier implementing the fit() and predict() methods. A user provided pipeline if the user has trained his own pipeline. :ivar language: string. Language of the conjugator. :ivar model: mlconjug3.Model or scikit-learn Pipeline or Classifier implementing the fit() and predict() methods. :ivar conjug_manager: Verbiste object. """ def __init__(self, language="fr", model=None): self.language = language self.conjug_manager = Verbiste(language=language) if not model: with ZipFile( pkg_resources.resource_stream( RESOURCE_PACKAGE, PRE_TRAINED_MODEL_PATH[language] ) ) as content: with content.open( "trained_model-{}-final.pickle".format(self.language), "r" ) as archive: model = joblib.load(archive) if model: self.set_model(model) else: self.model = model return def __repr__(self): return "{}.{}(language={})".format( __name__, self.__class__.__name__, self.language )
[docs] def conjugate(self, verbs, subject="abbrev"): """ Conjugate multiple verbs using multi-processing. :param verbs: list of strings or string. Verbs to conjugate. :param subject: string. Toggles abbreviated or full pronouns. The default value is 'abbrev'. Select 'pronoun' for full pronouns. :return verbs: list of Verb objects or None. """ if isinstance(verbs, str): # If only a single verb is passed, call the _conjugate method directly return self._conjugate(verbs, subject) else: with ProcessPoolExecutor() as executor: results = list( executor.map(self._conjugate, verbs, [subject] * len(verbs)) ) return results
[docs] @lru_cache(maxsize=1024) def _conjugate(self, verb, subject="abbrev"): """ | This is the main method of this class. | It first checks to see if the verb is in Verbiste. | If it is not, and a pre-trained scikit-learn pipeline has been supplied, the method then calls the pipeline to predict the conjugation class of the provided verb. | Returns a Verb object or None. :param verb: string. Verb to conjugate. :param subject: string. Toggles abbreviated or full pronouns. The default value is 'abbrev'. Select 'pronoun' for full pronouns. :return verb: Verb object or None. """ verb = verb.lower() prediction_score = 0 if not self.conjug_manager.is_valid_verb(verb): logger.warning( _("The supplied word: {0} is not a valid verb in {1}.").format( verb, LANGUAGE_FULL[self.language] ) ) return None if verb not in self.conjug_manager.verbs.keys(): if self.model is None: logger.warning( _("Please provide an instance of a mlconjug3.mlconjug3.Model") ) logger.warning( _( "The supplied word: {0} is not in the conjugation {1} table and no Conjugation Model was provided." ).format(verb, LANGUAGE_FULL[self.language]) ) return None prediction = self.model.predict([verb])[0] prediction_score = self.model.pipeline.predict_proba([verb])[0][prediction] predicted = True template = self.conjug_manager.templates[prediction] index = -len(template[template.index(":") + 1 :]) root = verb if index == 0 else verb[:index] verb_info = VerbInfo(verb, root, template) conjug_info = self.conjug_manager.get_conjug_info(verb_info.template) else: predicted = False infinitive = verb verb_info = self.conjug_manager.get_verb_info(infinitive) if verb_info is None: return None conjug_info = self.conjug_manager.get_conjug_info(verb_info.template) if conjug_info is None: return None if predicted: verb_object = VERBS[self.language]( verb_info, conjug_info, subject, predicted ) verb_object.confidence_score = round(prediction_score, 3) else: verb_object = VERBS[self.language](verb_info, conjug_info, subject) return verb_object
[docs] def set_model(self, model): """ Assigns the provided pre-trained scikit-learn pipeline to be able to conjugate unknown verbs. :param model: scikit-learn Classifier or Pipeline. :raises: ValueError. """ if not isinstance(model, Model): logger.warning( _("Please provide an instance of a mlconjug3.mlconjug3.Model") ) raise ValueError else: self.model = model return
if __name__ == "__main__": pass