diff --git a/conmorfeusz/conmorfeusz/service/analyzer.py b/conmorfeusz/conmorfeusz/service/analyzer.py index 6f0deab..1232f58 100644 --- a/conmorfeusz/conmorfeusz/service/analyzer.py +++ b/conmorfeusz/conmorfeusz/service/analyzer.py @@ -1,51 +1,23 @@ import morfeusz2 -from conmorfeusz import concraft -from typing import Dict, List - -_TAG_SPECS: Dict[str, List[str]] = { - "adv": ["degree"], - "imps": ["aspect"], - "inf": ["aspect"], - "pant": ["aspect"], - "pcon": ["aspect"], - "qub": ["vocalicity"], - "prep": ["case", "vocalicity"], - "siebie": ["case"], - "subst": ["number", "case", "gender"], - "depr": ["number", "case", "gender"], - "ger": ["number", "case", "gender", "aspect", "negation"], - "ppron12": ["number", "case", "gender", "person", "accentability"], - "ppron3": [ - "number", - "case", - "gender", - "person", - "accentability", - "postprepositionality", - ], - "num": ["number", "case", "gender", "accommodability"], - "numcol": ["number", "case", "gender", "accommodability"], - "adj": ["number", "case", "gender", "degree"], - "pact": ["number", "case", "gender", "aspect", "negation"], - "ppas": ["number", "case", "gender", "aspect", "negation"], - "winien": ["number", "gender", "aspect"], - "praet": ["number", "gender", "aspect", "agglutination"], - "bedzie": ["number", "person", "aspect"], - "fin": ["number", "person", "aspect"], - "impt": ["number", "person", "aspect"], - "aglt": ["number", "person", "aspect", "vocalicity"], -} +from conmorfeusz import concraft, utils -def parse_tags(tags: str) -> Dict[str, str | None]: - type_, *rest = tags.split(":") - fields = _TAG_SPECS.get(type_, []) - padded_values = rest + [None] * (len(fields) - len(rest)) - parsed = {field: value for field, value in zip(fields, padded_values)} +def analysis_entry_to_dict(entry): + start, end, morf = entry + segment, lemma, tags, frequency, qualifiers = morf - return {"type": type_, **parsed} + return { + "start": start, + "end": end, + "segment": segment, + "lemma": lemma, + "tags": utils.parse_tags(tags), + "frequency": frequency, + "qualifiers": qualifiers, + } -def entry_to_dict(entry): + +def disamb_entry_to_dict(entry): start, end, morf, prob, eos, disamb = entry segment, lemma, tags, frequency, qualifiers = morf @@ -54,7 +26,7 @@ def entry_to_dict(entry): "end": end, "segment": segment, "lemma": lemma, - "tags": parse_tags(tags), + "tags": utils.parse_tags(tags), "frequency": frequency, "qualifiers": qualifiers, "prob": float(prob), @@ -63,8 +35,13 @@ def entry_to_dict(entry): } -def analyse(text, port): +def analyse(text, disamb, port): morf = morfeusz2.Morfeusz(expand_tags=True) + analysis = morf.analyse(text) + + if disamb == False: + return [analysis_entry_to_dict(e) for e in analysis if len(e) == 3] + conc = concraft.Concraft(port=port) - analysis = morf.analyse(text) - return [entry_to_dict(e) for e in conc.disamb(analysis) if len(e) == 6] \ No newline at end of file + + return [disamb_entry_to_dict(e) for e in conc.disamb(analysis) if len(e) == 6] \ No newline at end of file diff --git a/conmorfeusz/conmorfeusz/service/generator.py b/conmorfeusz/conmorfeusz/service/generator.py new file mode 100644 index 0000000..757682a --- /dev/null +++ b/conmorfeusz/conmorfeusz/service/generator.py @@ -0,0 +1,18 @@ +import morfeusz2 +from conmorfeusz import utils + +def entry_to_dict(entry): + segment, lemma, tags, frequency, qualifiers = entry + + return { + "segment": segment, + "lemma": lemma, + "tags": utils.parse_tags(tags), + "frequency": frequency, + "qualifiers": qualifiers, + } + + +def generate(lemma): + morf = morfeusz2.Morfeusz(expand_tags=True) + return [entry_to_dict(e) for e in morf.generate(lemma) if len(e) > 0] \ No newline at end of file diff --git a/conmorfeusz/conmorfeusz/utils/__init__.py b/conmorfeusz/conmorfeusz/utils/__init__.py new file mode 100644 index 0000000..d19f5c5 --- /dev/null +++ b/conmorfeusz/conmorfeusz/utils/__init__.py @@ -0,0 +1,44 @@ +from typing import Dict, List + +_TAG_SPECS: Dict[str, List[str]] = { + "adv": ["degree"], + "imps": ["aspect"], + "inf": ["aspect"], + "pant": ["aspect"], + "pcon": ["aspect"], + "qub": ["vocalicity"], + "prep": ["case", "vocalicity"], + "siebie": ["case"], + "subst": ["number", "case", "gender"], + "depr": ["number", "case", "gender"], + "ger": ["number", "case", "gender", "aspect", "negation"], + "ppron12": ["number", "case", "gender", "person", "accentability"], + "ppron3": [ + "number", + "case", + "gender", + "person", + "accentability", + "postprepositionality", + ], + "num": ["number", "case", "gender", "accommodability"], + "numcol": ["number", "case", "gender", "accommodability"], + "adj": ["number", "case", "gender", "degree"], + "pact": ["number", "case", "gender", "aspect", "negation"], + "ppas": ["number", "case", "gender", "aspect", "negation"], + "winien": ["number", "gender", "aspect"], + "praet": ["number", "gender", "aspect", "agglutination"], + "bedzie": ["number", "person", "aspect"], + "fin": ["number", "person", "aspect"], + "impt": ["number", "person", "aspect"], + "aglt": ["number", "person", "aspect", "vocalicity"], +} + + +def parse_tags(tags: str) -> Dict[str, str | None]: + type_, *rest = tags.split(":") + fields = _TAG_SPECS.get(type_, []) + padded_values = rest + [None] * (len(fields) - len(rest)) + parsed = {field: value for field, value in zip(fields, padded_values)} + + return {"type": type_, **parsed} \ No newline at end of file diff --git a/conmorfeusz/conmorfeusz/web/__init__.py b/conmorfeusz/conmorfeusz/web/__init__.py index da43197..46dfafa 100644 --- a/conmorfeusz/conmorfeusz/web/__init__.py +++ b/conmorfeusz/conmorfeusz/web/__init__.py @@ -1,5 +1,5 @@ from flask import Flask -from . import analyzer +from . import analyzer, generator def create_app(config=None): app = Flask(__name__, instance_relative_config=True) @@ -13,5 +13,6 @@ def create_app(config=None): app.config.update(config) app.register_blueprint(analyzer.bp) + app.register_blueprint(generator.bp) return app diff --git a/conmorfeusz/conmorfeusz/web/analyzer.py b/conmorfeusz/conmorfeusz/web/analyzer.py index b8bb739..3dc4402 100644 --- a/conmorfeusz/conmorfeusz/web/analyzer.py +++ b/conmorfeusz/conmorfeusz/web/analyzer.py @@ -8,5 +8,5 @@ def analyze(): data = request.get_json() return { - "analysis": service.analyse(data["text"], current_app.config['CONCRAFT_PL_PORT']) + "analysis": service.analyse(data["text"], data["disamb"] if "disamb" in data else False, current_app.config['CONCRAFT_PL_PORT']) } \ No newline at end of file diff --git a/conmorfeusz/conmorfeusz/web/generator.py b/conmorfeusz/conmorfeusz/web/generator.py new file mode 100644 index 0000000..fc26501 --- /dev/null +++ b/conmorfeusz/conmorfeusz/web/generator.py @@ -0,0 +1,12 @@ +from flask import Blueprint, request, current_app +from conmorfeusz.service import generator as service + +bp = Blueprint('generator', __name__, url_prefix='/generator') + +@bp.post("/generate") +def analyze(): + data = request.get_json() + + return { + "output": service.generate(data["lemma"]) + } \ No newline at end of file