Add support for generating + enable analysis without disambiguation
This commit is contained in:
@@ -1,51 +1,23 @@
|
|||||||
import morfeusz2
|
import morfeusz2
|
||||||
from conmorfeusz import concraft
|
from conmorfeusz import concraft, utils
|
||||||
from typing import Dict, List
|
|
||||||
|
|
||||||
_TAG_SPECS: Dict[str, List[str]] = {
|
|
||||||
"adv": ["degree"],
|
|
||||||
"imps": ["aspect"],
|
|
||||||
"inf": ["aspect"],
|
|
||||||
"pant": ["aspect"],
|
|
||||||
"pcon": ["aspect"],
|
|
||||||
"qub": ["vocalicity"],
|
|
||||||
"prep": ["case", "vocalicity"],
|
|
||||||
"siebie": ["case"],
|
|
||||||
"subst": ["number", "case", "gender"],
|
|
||||||
"depr": ["number", "case", "gender"],
|
|
||||||
"ger": ["number", "case", "gender", "aspect", "negation"],
|
|
||||||
"ppron12": ["number", "case", "gender", "person", "accentability"],
|
|
||||||
"ppron3": [
|
|
||||||
"number",
|
|
||||||
"case",
|
|
||||||
"gender",
|
|
||||||
"person",
|
|
||||||
"accentability",
|
|
||||||
"postprepositionality",
|
|
||||||
],
|
|
||||||
"num": ["number", "case", "gender", "accommodability"],
|
|
||||||
"numcol": ["number", "case", "gender", "accommodability"],
|
|
||||||
"adj": ["number", "case", "gender", "degree"],
|
|
||||||
"pact": ["number", "case", "gender", "aspect", "negation"],
|
|
||||||
"ppas": ["number", "case", "gender", "aspect", "negation"],
|
|
||||||
"winien": ["number", "gender", "aspect"],
|
|
||||||
"praet": ["number", "gender", "aspect", "agglutination"],
|
|
||||||
"bedzie": ["number", "person", "aspect"],
|
|
||||||
"fin": ["number", "person", "aspect"],
|
|
||||||
"impt": ["number", "person", "aspect"],
|
|
||||||
"aglt": ["number", "person", "aspect", "vocalicity"],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def parse_tags(tags: str) -> Dict[str, str | None]:
|
def analysis_entry_to_dict(entry):
|
||||||
type_, *rest = tags.split(":")
|
start, end, morf = entry
|
||||||
fields = _TAG_SPECS.get(type_, [])
|
segment, lemma, tags, frequency, qualifiers = morf
|
||||||
padded_values = rest + [None] * (len(fields) - len(rest))
|
|
||||||
parsed = {field: value for field, value in zip(fields, padded_values)}
|
|
||||||
|
|
||||||
return {"type": type_, **parsed}
|
return {
|
||||||
|
"start": start,
|
||||||
|
"end": end,
|
||||||
|
"segment": segment,
|
||||||
|
"lemma": lemma,
|
||||||
|
"tags": utils.parse_tags(tags),
|
||||||
|
"frequency": frequency,
|
||||||
|
"qualifiers": qualifiers,
|
||||||
|
}
|
||||||
|
|
||||||
def entry_to_dict(entry):
|
|
||||||
|
def disamb_entry_to_dict(entry):
|
||||||
start, end, morf, prob, eos, disamb = entry
|
start, end, morf, prob, eos, disamb = entry
|
||||||
segment, lemma, tags, frequency, qualifiers = morf
|
segment, lemma, tags, frequency, qualifiers = morf
|
||||||
|
|
||||||
@@ -54,7 +26,7 @@ def entry_to_dict(entry):
|
|||||||
"end": end,
|
"end": end,
|
||||||
"segment": segment,
|
"segment": segment,
|
||||||
"lemma": lemma,
|
"lemma": lemma,
|
||||||
"tags": parse_tags(tags),
|
"tags": utils.parse_tags(tags),
|
||||||
"frequency": frequency,
|
"frequency": frequency,
|
||||||
"qualifiers": qualifiers,
|
"qualifiers": qualifiers,
|
||||||
"prob": float(prob),
|
"prob": float(prob),
|
||||||
@@ -63,8 +35,13 @@ def entry_to_dict(entry):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def analyse(text, port):
|
def analyse(text, disamb, port):
|
||||||
morf = morfeusz2.Morfeusz(expand_tags=True)
|
morf = morfeusz2.Morfeusz(expand_tags=True)
|
||||||
conc = concraft.Concraft(port=port)
|
|
||||||
analysis = morf.analyse(text)
|
analysis = morf.analyse(text)
|
||||||
return [entry_to_dict(e) for e in conc.disamb(analysis) if len(e) == 6]
|
|
||||||
|
if disamb == False:
|
||||||
|
return [analysis_entry_to_dict(e) for e in analysis if len(e) == 3]
|
||||||
|
|
||||||
|
conc = concraft.Concraft(port=port)
|
||||||
|
|
||||||
|
return [disamb_entry_to_dict(e) for e in conc.disamb(analysis) if len(e) == 6]
|
||||||
18
conmorfeusz/conmorfeusz/service/generator.py
Normal file
18
conmorfeusz/conmorfeusz/service/generator.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
import morfeusz2
|
||||||
|
from conmorfeusz import utils
|
||||||
|
|
||||||
|
def entry_to_dict(entry):
|
||||||
|
segment, lemma, tags, frequency, qualifiers = entry
|
||||||
|
|
||||||
|
return {
|
||||||
|
"segment": segment,
|
||||||
|
"lemma": lemma,
|
||||||
|
"tags": utils.parse_tags(tags),
|
||||||
|
"frequency": frequency,
|
||||||
|
"qualifiers": qualifiers,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def generate(lemma):
|
||||||
|
morf = morfeusz2.Morfeusz(expand_tags=True)
|
||||||
|
return [entry_to_dict(e) for e in morf.generate(lemma) if len(e) > 0]
|
||||||
44
conmorfeusz/conmorfeusz/utils/__init__.py
Normal file
44
conmorfeusz/conmorfeusz/utils/__init__.py
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
_TAG_SPECS: Dict[str, List[str]] = {
|
||||||
|
"adv": ["degree"],
|
||||||
|
"imps": ["aspect"],
|
||||||
|
"inf": ["aspect"],
|
||||||
|
"pant": ["aspect"],
|
||||||
|
"pcon": ["aspect"],
|
||||||
|
"qub": ["vocalicity"],
|
||||||
|
"prep": ["case", "vocalicity"],
|
||||||
|
"siebie": ["case"],
|
||||||
|
"subst": ["number", "case", "gender"],
|
||||||
|
"depr": ["number", "case", "gender"],
|
||||||
|
"ger": ["number", "case", "gender", "aspect", "negation"],
|
||||||
|
"ppron12": ["number", "case", "gender", "person", "accentability"],
|
||||||
|
"ppron3": [
|
||||||
|
"number",
|
||||||
|
"case",
|
||||||
|
"gender",
|
||||||
|
"person",
|
||||||
|
"accentability",
|
||||||
|
"postprepositionality",
|
||||||
|
],
|
||||||
|
"num": ["number", "case", "gender", "accommodability"],
|
||||||
|
"numcol": ["number", "case", "gender", "accommodability"],
|
||||||
|
"adj": ["number", "case", "gender", "degree"],
|
||||||
|
"pact": ["number", "case", "gender", "aspect", "negation"],
|
||||||
|
"ppas": ["number", "case", "gender", "aspect", "negation"],
|
||||||
|
"winien": ["number", "gender", "aspect"],
|
||||||
|
"praet": ["number", "gender", "aspect", "agglutination"],
|
||||||
|
"bedzie": ["number", "person", "aspect"],
|
||||||
|
"fin": ["number", "person", "aspect"],
|
||||||
|
"impt": ["number", "person", "aspect"],
|
||||||
|
"aglt": ["number", "person", "aspect", "vocalicity"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def parse_tags(tags: str) -> Dict[str, str | None]:
|
||||||
|
type_, *rest = tags.split(":")
|
||||||
|
fields = _TAG_SPECS.get(type_, [])
|
||||||
|
padded_values = rest + [None] * (len(fields) - len(rest))
|
||||||
|
parsed = {field: value for field, value in zip(fields, padded_values)}
|
||||||
|
|
||||||
|
return {"type": type_, **parsed}
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
from flask import Flask
|
from flask import Flask
|
||||||
from . import analyzer
|
from . import analyzer, generator
|
||||||
|
|
||||||
def create_app(config=None):
|
def create_app(config=None):
|
||||||
app = Flask(__name__, instance_relative_config=True)
|
app = Flask(__name__, instance_relative_config=True)
|
||||||
@@ -13,5 +13,6 @@ def create_app(config=None):
|
|||||||
app.config.update(config)
|
app.config.update(config)
|
||||||
|
|
||||||
app.register_blueprint(analyzer.bp)
|
app.register_blueprint(analyzer.bp)
|
||||||
|
app.register_blueprint(generator.bp)
|
||||||
|
|
||||||
return app
|
return app
|
||||||
|
|||||||
@@ -8,5 +8,5 @@ def analyze():
|
|||||||
data = request.get_json()
|
data = request.get_json()
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"analysis": service.analyse(data["text"], current_app.config['CONCRAFT_PL_PORT'])
|
"analysis": service.analyse(data["text"], data["disamb"] if "disamb" in data else False, current_app.config['CONCRAFT_PL_PORT'])
|
||||||
}
|
}
|
||||||
12
conmorfeusz/conmorfeusz/web/generator.py
Normal file
12
conmorfeusz/conmorfeusz/web/generator.py
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
from flask import Blueprint, request, current_app
|
||||||
|
from conmorfeusz.service import generator as service
|
||||||
|
|
||||||
|
bp = Blueprint('generator', __name__, url_prefix='/generator')
|
||||||
|
|
||||||
|
@bp.post("/generate")
|
||||||
|
def analyze():
|
||||||
|
data = request.get_json()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"output": service.generate(data["lemma"])
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user