Add support for generating + enable analysis without disambiguation

This commit is contained in:
2026-01-09 15:50:38 +01:00
parent 2bde3a5708
commit ada8ca75fa
6 changed files with 101 additions and 49 deletions

View File

@@ -1,51 +1,23 @@
import morfeusz2
from conmorfeusz import concraft
from typing import Dict, List
_TAG_SPECS: Dict[str, List[str]] = {
"adv": ["degree"],
"imps": ["aspect"],
"inf": ["aspect"],
"pant": ["aspect"],
"pcon": ["aspect"],
"qub": ["vocalicity"],
"prep": ["case", "vocalicity"],
"siebie": ["case"],
"subst": ["number", "case", "gender"],
"depr": ["number", "case", "gender"],
"ger": ["number", "case", "gender", "aspect", "negation"],
"ppron12": ["number", "case", "gender", "person", "accentability"],
"ppron3": [
"number",
"case",
"gender",
"person",
"accentability",
"postprepositionality",
],
"num": ["number", "case", "gender", "accommodability"],
"numcol": ["number", "case", "gender", "accommodability"],
"adj": ["number", "case", "gender", "degree"],
"pact": ["number", "case", "gender", "aspect", "negation"],
"ppas": ["number", "case", "gender", "aspect", "negation"],
"winien": ["number", "gender", "aspect"],
"praet": ["number", "gender", "aspect", "agglutination"],
"bedzie": ["number", "person", "aspect"],
"fin": ["number", "person", "aspect"],
"impt": ["number", "person", "aspect"],
"aglt": ["number", "person", "aspect", "vocalicity"],
}
from conmorfeusz import concraft, utils
def parse_tags(tags: str) -> Dict[str, str | None]:
type_, *rest = tags.split(":")
fields = _TAG_SPECS.get(type_, [])
padded_values = rest + [None] * (len(fields) - len(rest))
parsed = {field: value for field, value in zip(fields, padded_values)}
def analysis_entry_to_dict(entry):
start, end, morf = entry
segment, lemma, tags, frequency, qualifiers = morf
return {"type": type_, **parsed}
return {
"start": start,
"end": end,
"segment": segment,
"lemma": lemma,
"tags": utils.parse_tags(tags),
"frequency": frequency,
"qualifiers": qualifiers,
}
def entry_to_dict(entry):
def disamb_entry_to_dict(entry):
start, end, morf, prob, eos, disamb = entry
segment, lemma, tags, frequency, qualifiers = morf
@@ -54,7 +26,7 @@ def entry_to_dict(entry):
"end": end,
"segment": segment,
"lemma": lemma,
"tags": parse_tags(tags),
"tags": utils.parse_tags(tags),
"frequency": frequency,
"qualifiers": qualifiers,
"prob": float(prob),
@@ -63,8 +35,13 @@ def entry_to_dict(entry):
}
def analyse(text, port):
def analyse(text, disamb, port):
morf = morfeusz2.Morfeusz(expand_tags=True)
conc = concraft.Concraft(port=port)
analysis = morf.analyse(text)
return [entry_to_dict(e) for e in conc.disamb(analysis) if len(e) == 6]
if disamb == False:
return [analysis_entry_to_dict(e) for e in analysis if len(e) == 3]
conc = concraft.Concraft(port=port)
return [disamb_entry_to_dict(e) for e in conc.disamb(analysis) if len(e) == 6]

View File

@@ -0,0 +1,18 @@
import morfeusz2
from conmorfeusz import utils
def entry_to_dict(entry):
segment, lemma, tags, frequency, qualifiers = entry
return {
"segment": segment,
"lemma": lemma,
"tags": utils.parse_tags(tags),
"frequency": frequency,
"qualifiers": qualifiers,
}
def generate(lemma):
morf = morfeusz2.Morfeusz(expand_tags=True)
return [entry_to_dict(e) for e in morf.generate(lemma) if len(e) > 0]

View File

@@ -0,0 +1,44 @@
from typing import Dict, List
_TAG_SPECS: Dict[str, List[str]] = {
"adv": ["degree"],
"imps": ["aspect"],
"inf": ["aspect"],
"pant": ["aspect"],
"pcon": ["aspect"],
"qub": ["vocalicity"],
"prep": ["case", "vocalicity"],
"siebie": ["case"],
"subst": ["number", "case", "gender"],
"depr": ["number", "case", "gender"],
"ger": ["number", "case", "gender", "aspect", "negation"],
"ppron12": ["number", "case", "gender", "person", "accentability"],
"ppron3": [
"number",
"case",
"gender",
"person",
"accentability",
"postprepositionality",
],
"num": ["number", "case", "gender", "accommodability"],
"numcol": ["number", "case", "gender", "accommodability"],
"adj": ["number", "case", "gender", "degree"],
"pact": ["number", "case", "gender", "aspect", "negation"],
"ppas": ["number", "case", "gender", "aspect", "negation"],
"winien": ["number", "gender", "aspect"],
"praet": ["number", "gender", "aspect", "agglutination"],
"bedzie": ["number", "person", "aspect"],
"fin": ["number", "person", "aspect"],
"impt": ["number", "person", "aspect"],
"aglt": ["number", "person", "aspect", "vocalicity"],
}
def parse_tags(tags: str) -> Dict[str, str | None]:
type_, *rest = tags.split(":")
fields = _TAG_SPECS.get(type_, [])
padded_values = rest + [None] * (len(fields) - len(rest))
parsed = {field: value for field, value in zip(fields, padded_values)}
return {"type": type_, **parsed}

View File

@@ -1,5 +1,5 @@
from flask import Flask
from . import analyzer
from . import analyzer, generator
def create_app(config=None):
app = Flask(__name__, instance_relative_config=True)
@@ -13,5 +13,6 @@ def create_app(config=None):
app.config.update(config)
app.register_blueprint(analyzer.bp)
app.register_blueprint(generator.bp)
return app

View File

@@ -8,5 +8,5 @@ def analyze():
data = request.get_json()
return {
"analysis": service.analyse(data["text"], current_app.config['CONCRAFT_PL_PORT'])
"analysis": service.analyse(data["text"], request.args.get('disamb', False), current_app.config['CONCRAFT_PL_PORT'])
}

View File

@@ -0,0 +1,12 @@
from flask import Blueprint, request, current_app
from conmorfeusz.service import generator as service
bp = Blueprint('generator', __name__, url_prefix='/generator')
@bp.post("/generate")
def analyze():
data = request.get_json()
return {
"output": service.generate(data["lemma"])
}