Source code for mlconjug3.conjug_manager.conjug_manager

"""
ConjugManager.

This module declares the code for the class ConjugManager.

More information about mlconjug3 at https://pypi.org/project/mlconjug3/
The conjugation data conforms to the JSon schema defined by mlconjug3.
"""

__author__ = "Ars-Linguistica"
__author_email__ = "diao.sekou.nlp@gmail.com"

import os
import joblib
import copy
import defusedxml.ElementTree as ET
import json
from collections import OrderedDict
import pkg_resources
from mlconjug3.constants import *
from mlconjug3.verbs import *


[docs]class ConjugManager: """ This is the class handling the mlconjug3 json files. :param language: string. | The language of the conjugator. The default value is fr for French. | The allowed values are: fr, en, es, it, pt, ro. :ivar language: Language of the conjugator. :ivar verbs: Dictionary where the keys are verbs and the values are conjugation patterns. :ivar conjugations: Dictionary where the keys are conjugation patterns and the values are inflected forms. :ivar templates: list of string representing the conjugation templates. :ivar _allowed_endings: set containing the allowed endings of verbs in the target language. """ def __init__(self, language="default"): if language not in LANGUAGES: raise ValueError( _( "Unsupported language.\nThe allowed languages are fr, en, es, it, pt, ro." ) ) self.language = "fr" if language == "default" else language self.verbs = {} self.conjugations = OrderedDict() verbs_file = pkg_resources.resource_filename( RESOURCE_PACKAGE, VERBS_RESOURCE_PATH[self.language] ) self._load_verbs(verbs_file) self._allowed_endings = self._detect_allowed_endings() conjugations_file = pkg_resources.resource_filename( RESOURCE_PACKAGE, CONJUGATIONS_RESOURCE_PATH[self.language] ) self._load_conjugations(conjugations_file) self.templates = sorted(self.conjugations.keys()) return def __repr__(self): return "{}.{}(language={})".format( __name__, self.__class__.__name__, self.language ) def _load_cache(self, file): file_path = os.path.abspath(file) if not file_path.endswith(".json"): raise ValueError(f"Invalid file path, expected .json file, got {file_path}") pkl_file = file_path + ".pkl" if os.path.isfile(pkl_file): last_modified_time_file = os.path.getmtime(file_path) last_modified_time_pkl = os.path.getmtime(pkl_file) if last_modified_time_file <= last_modified_time_pkl: file_dic = joblib.load(pkl_file) return file_dic else: return None
[docs] def _load_verbs(self, verbs_file): """ Load and parses the verbs from the json file. :param verbs_file: string or path object. Path to the verbs json file. """ cache = self._load_cache(verbs_file) if cache: self.verbs = cache else: with open(verbs_file, encoding="utf-8") as file: self.verbs = json.load(file) return
[docs] def _load_conjugations(self, conjugations_file): """ Load and parses the conjugations from the json file. :param conjugations_file: string or path object. Path to the conjugation json file. """ cache = self._load_cache(conjugations_file) if cache: self.conjugations = cache else: with open(conjugations_file, encoding="utf-8") as file: self.conjugations = json.load(file) return
[docs] def _detect_allowed_endings(self): """ | Detects the allowed endings for verbs in the supported languages. | All the supported languages except for English restrict the form a verb can take. | As English is much more productive and varied in the morphology of its verbs, any word is allowed as a verb. :return allowed_endings: set. A set containing the allowed endings of verbs in the target language. """ if self.language == "en": return set() return {verb.split(" ")[0][-2:] for verb in self.verbs if len(verb) >= 2}
[docs] def is_valid_verb(self, verb): """ | Checks if the verb is a valid verb in the given language. | English words are always treated as possible verbs. | Verbs in other languages are filtered by their endings. :param verb: string. The verb to conjugate. :return is_allowed: bool. True if the verb is a valid verb in the language. False otherwise. """ if self.language == "en": return True # LOL! return verb[-2:] in self._allowed_endings
[docs] def get_verb_info(self, verb): """ Gets verb information and returns a VerbInfo instance. :param verb: string. Verb to conjugate. :return VerbInfo: VerbInfo object or None. """ if verb not in self.verbs.keys(): return None infinitive = verb root = self.verbs[verb]["root"] template = self.verbs[verb]["template"] return VerbInfo(infinitive, root, template)
[docs] def get_conjug_info(self, template): """ Gets conjugation information corresponding to the given template. :param template: string. Name of the verb ending pattern. :return inflected_forms: OrderedDict or None. OrderedDict containing the conjugated suffixes of the template. """ if template not in self.conjugations.keys(): return None return copy.deepcopy(self.conjugations[template])
if __name__ == "__main__": pass