Compare commits
7 Commits
7fbe57c05d
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
3a3905f2c6
|
|||
|
b0cf963f91
|
|||
|
bad1211c29
|
|||
|
2bde3a5708
|
|||
|
5eda925885
|
|||
|
b9e08735a8
|
|||
|
ee7b6cc02c
|
45
conmorfeusz/conmorfeusz/cli/__init__.py
Normal file
45
conmorfeusz/conmorfeusz/cli/__init__.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import os
|
||||
import click
|
||||
import subprocess
|
||||
import conmorfeusz.web as web
|
||||
import conmorfeusz.concraft as ccpl
|
||||
from conmorfeusz.web import create_app
|
||||
from conmorfeusz.server import StandaloneApplication
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option("--host", default="0.0.0.0", help='Default host on which the application will be listening on.')
|
||||
@click.option('--port', default=5000, help='Port which the application will be listening on.')
|
||||
@click.option("--workers", default=4, help='Number of workers used to handle incoming requests.')
|
||||
@click.option('--ccpl-port', default=3000, help='Port which internal concraft-pl server will be listening on.')
|
||||
@click.option('--ccpl-model', default=None, help='Model used by concraft-pl. It will fallback to CONCRAFT_PL_MODEL envvar if not specified.')
|
||||
@click.option('--ccpl-bin', default=None, help='Concraft-pl binary file. It will fallback to CONCRAFT_PL_BIN envvar if not specified.')
|
||||
@click.option("--ccpl-rts", default="-A4M -N4", help='Runtime system configuration for Concraft-pl VM.')
|
||||
def run(host, port, workers, ccpl_port, ccpl_model, ccpl_bin, ccpl_rts):
|
||||
"""
|
||||
Glues all services together and starts the holistic app.
|
||||
"""
|
||||
|
||||
ccpl_model = ccpl_model or os.environ.get('CONCRAFT_PL_MODEL')
|
||||
ccpl_bin = ccpl_bin or os.environ.get('CONCRAFT_PL_BIN')
|
||||
|
||||
if ccpl_model is None:
|
||||
raise Exception("Model must be specified either by --ccpl-model option or by CONCRAFT_PL_MODEL environment variable")
|
||||
|
||||
if ccpl_bin is None:
|
||||
raise Exception("Concraft-pl binary file must be specified either by --ccpl-bin option or by CONCRAFT_PL_BIN environment variable")
|
||||
|
||||
concraft_cfg = (ccpl_model, ccpl_bin, ccpl_port, ccpl_rts.split())
|
||||
|
||||
with ccpl.start_server(*concraft_cfg):
|
||||
options = {
|
||||
'bind': f'{host}:{port}',
|
||||
'workers': workers,
|
||||
}
|
||||
|
||||
app_config = {
|
||||
'CONCRAFT_PL_PORT': ccpl_port,
|
||||
}
|
||||
|
||||
StandaloneApplication(create_app, options, app_config).run()
|
||||
|
||||
@@ -86,7 +86,7 @@ class Concraft(object):
|
||||
|
||||
class Server(object):
|
||||
def __init__(self, model_path, concraft_path="concraft-pl", port=3000,
|
||||
core_num=1, allocation_size=64):
|
||||
runtimeArgs=[]):
|
||||
"""
|
||||
Start a Concraft-pl server instance in the background.
|
||||
|
||||
@@ -105,8 +105,7 @@ class Server(object):
|
||||
"""
|
||||
self.port = port
|
||||
self.concraft_server = Popen([concraft_path, 'server',
|
||||
'--port={}'.format(port), '-i', model_path, '+RTS',
|
||||
'-N{}'.format(core_num), '-A{}M'.format(allocation_size),],
|
||||
'--port={}'.format(port), '-i', model_path, '+RTS', *runtimeArgs],
|
||||
stdin=PIPE, stdout=PIPE, stderr=PIPE)
|
||||
# print(u"Concraft model " + model_path + u" loading...")
|
||||
loaded = False
|
||||
|
||||
@@ -1,14 +1,5 @@
|
||||
import os
|
||||
from . import runner
|
||||
from . import cli
|
||||
|
||||
def main():
|
||||
exe = os.environ.get('CONCRAFT_PL_BIN')
|
||||
model = os.environ.get('CONCRAFT_PL_MODEL')
|
||||
port=3000
|
||||
core_num=1
|
||||
allocation_size=64
|
||||
|
||||
concraft_cfg = (model, exe, port, core_num, allocation_size)
|
||||
runner.run(concraft_cfg)
|
||||
|
||||
cli.run()
|
||||
|
||||
10
conmorfeusz/conmorfeusz/morfeusz/__init__.py
Normal file
10
conmorfeusz/conmorfeusz/morfeusz/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
import morfeusz2
|
||||
|
||||
class Morfeusz:
|
||||
_instance = None
|
||||
|
||||
@classmethod
|
||||
def get(cls):
|
||||
if cls._instance is None:
|
||||
cls._instance = morfeusz2.Morfeusz(expand_tags=True)
|
||||
return cls._instance
|
||||
@@ -1,9 +0,0 @@
|
||||
import conmorfeusz.web as web
|
||||
import conmorfeusz.concraft as cc
|
||||
|
||||
def run(concraft):
|
||||
"""
|
||||
Glues all services together and starts the holistic app.
|
||||
"""
|
||||
with cc.start_server(*concraft):
|
||||
web.start()
|
||||
16
conmorfeusz/conmorfeusz/server/__init__.py
Normal file
16
conmorfeusz/conmorfeusz/server/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from gunicorn.app.base import BaseApplication
|
||||
|
||||
class StandaloneApplication(BaseApplication):
|
||||
def __init__(self, app_factory, options=None, app_config=None):
|
||||
self.options = options or {}
|
||||
self.app_factory = app_factory
|
||||
self.app_config = app_config or {}
|
||||
super().__init__()
|
||||
|
||||
def load_config(self):
|
||||
for key, value in self.options.items():
|
||||
if key in self.cfg.settings and value is not None:
|
||||
self.cfg.set(key.lower(), value)
|
||||
|
||||
def load(self):
|
||||
return self.app_factory(self.app_config)
|
||||
@@ -1,51 +1,22 @@
|
||||
import morfeusz2
|
||||
from conmorfeusz import concraft
|
||||
from typing import Dict, List
|
||||
|
||||
_TAG_SPECS: Dict[str, List[str]] = {
|
||||
"adv": ["degree"],
|
||||
"imps": ["aspect"],
|
||||
"inf": ["aspect"],
|
||||
"pant": ["aspect"],
|
||||
"pcon": ["aspect"],
|
||||
"qub": ["vocalicity"],
|
||||
"prep": ["case", "vocalicity"],
|
||||
"siebie": ["case"],
|
||||
"subst": ["number", "case", "gender"],
|
||||
"depr": ["number", "case", "gender"],
|
||||
"ger": ["number", "case", "gender", "aspect", "negation"],
|
||||
"ppron12": ["number", "case", "gender", "person", "accentability"],
|
||||
"ppron3": [
|
||||
"number",
|
||||
"case",
|
||||
"gender",
|
||||
"person",
|
||||
"accentability",
|
||||
"postprepositionality",
|
||||
],
|
||||
"num": ["number", "case", "gender", "accommodability"],
|
||||
"numcol": ["number", "case", "gender", "accommodability"],
|
||||
"adj": ["number", "case", "gender", "degree"],
|
||||
"pact": ["number", "case", "gender", "aspect", "negation"],
|
||||
"ppas": ["number", "case", "gender", "aspect", "negation"],
|
||||
"winien": ["number", "gender", "aspect"],
|
||||
"praet": ["number", "gender", "aspect", "agglutination"],
|
||||
"bedzie": ["number", "person", "aspect"],
|
||||
"fin": ["number", "person", "aspect"],
|
||||
"impt": ["number", "person", "aspect"],
|
||||
"aglt": ["number", "person", "aspect", "vocalicity"],
|
||||
}
|
||||
from conmorfeusz import morfeusz, concraft, utils
|
||||
|
||||
|
||||
def parse_tags(tags: str) -> Dict[str, str | None]:
|
||||
type_, *rest = tags.split(":")
|
||||
fields = _TAG_SPECS.get(type_, [])
|
||||
padded_values = rest + [None] * (len(fields) - len(rest))
|
||||
parsed = {field: value for field, value in zip(fields, padded_values)}
|
||||
def analysis_entry_to_dict(entry):
|
||||
start, end, morf = entry
|
||||
segment, lemma, tags, frequency, qualifiers = morf
|
||||
|
||||
return {"type": type_, **parsed}
|
||||
return {
|
||||
"start": start,
|
||||
"end": end,
|
||||
"segment": segment,
|
||||
"lemma": lemma,
|
||||
"tags": utils.parse_tags(tags),
|
||||
"frequency": frequency,
|
||||
"qualifiers": qualifiers,
|
||||
}
|
||||
|
||||
def entry_to_dict(entry):
|
||||
|
||||
def disamb_entry_to_dict(entry):
|
||||
start, end, morf, prob, eos, disamb = entry
|
||||
segment, lemma, tags, frequency, qualifiers = morf
|
||||
|
||||
@@ -54,7 +25,7 @@ def entry_to_dict(entry):
|
||||
"end": end,
|
||||
"segment": segment,
|
||||
"lemma": lemma,
|
||||
"tags": parse_tags(tags),
|
||||
"tags": utils.parse_tags(tags),
|
||||
"frequency": frequency,
|
||||
"qualifiers": qualifiers,
|
||||
"prob": float(prob),
|
||||
@@ -63,8 +34,13 @@ def entry_to_dict(entry):
|
||||
}
|
||||
|
||||
|
||||
def analyse(text):
|
||||
morf = morfeusz2.Morfeusz(expand_tags=True)
|
||||
conc = concraft.Concraft()
|
||||
analysis = morf.analyse(text)
|
||||
return [entry_to_dict(e) for e in conc.disamb(analysis) if len(e) == 6]
|
||||
def analyse(text, disamb, port):
|
||||
morf = morfeusz.Morfeusz.get()
|
||||
analysis = morf.analyse(text)
|
||||
|
||||
if disamb == False:
|
||||
return [analysis_entry_to_dict(e) for e in analysis if len(e) == 3]
|
||||
|
||||
conc = concraft.Concraft(port=port)
|
||||
|
||||
return [disamb_entry_to_dict(e) for e in conc.disamb(analysis) if len(e) == 6]
|
||||
17
conmorfeusz/conmorfeusz/service/generator.py
Normal file
17
conmorfeusz/conmorfeusz/service/generator.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from conmorfeusz import morfeusz, utils
|
||||
|
||||
def entry_to_dict(entry):
|
||||
segment, lemma, tags, frequency, qualifiers = entry
|
||||
|
||||
return {
|
||||
"segment": segment,
|
||||
"lemma": lemma,
|
||||
"tags": utils.parse_tags(tags),
|
||||
"frequency": frequency,
|
||||
"qualifiers": qualifiers,
|
||||
}
|
||||
|
||||
|
||||
def generate(lemma):
|
||||
morf = morfeusz.Morfeusz.get()
|
||||
return [entry_to_dict(e) for e in morf.generate(lemma) if len(e) > 0]
|
||||
44
conmorfeusz/conmorfeusz/utils/__init__.py
Normal file
44
conmorfeusz/conmorfeusz/utils/__init__.py
Normal file
@@ -0,0 +1,44 @@
|
||||
from typing import Dict, List
|
||||
|
||||
_TAG_SPECS: Dict[str, List[str]] = {
|
||||
"adv": ["degree"],
|
||||
"imps": ["aspect"],
|
||||
"inf": ["aspect"],
|
||||
"pant": ["aspect"],
|
||||
"pcon": ["aspect"],
|
||||
"qub": ["vocalicity"],
|
||||
"prep": ["case", "vocalicity"],
|
||||
"siebie": ["case"],
|
||||
"subst": ["number", "case", "gender"],
|
||||
"depr": ["number", "case", "gender"],
|
||||
"ger": ["number", "case", "gender", "aspect", "negation"],
|
||||
"ppron12": ["number", "case", "gender", "person", "accentability"],
|
||||
"ppron3": [
|
||||
"number",
|
||||
"case",
|
||||
"gender",
|
||||
"person",
|
||||
"accentability",
|
||||
"postprepositionality",
|
||||
],
|
||||
"num": ["number", "case", "gender", "accommodability"],
|
||||
"numcol": ["number", "case", "gender", "accommodability"],
|
||||
"adj": ["number", "case", "gender", "degree"],
|
||||
"pact": ["number", "case", "gender", "aspect", "negation"],
|
||||
"ppas": ["number", "case", "gender", "aspect", "negation"],
|
||||
"winien": ["number", "gender", "aspect"],
|
||||
"praet": ["number", "gender", "aspect", "agglutination"],
|
||||
"bedzie": ["number", "person", "aspect"],
|
||||
"fin": ["number", "person", "aspect"],
|
||||
"impt": ["number", "person", "aspect"],
|
||||
"aglt": ["number", "person", "aspect", "vocalicity"],
|
||||
}
|
||||
|
||||
|
||||
def parse_tags(tags: str) -> Dict[str, str | None]:
|
||||
type_, *rest = tags.split(":")
|
||||
fields = _TAG_SPECS.get(type_, [])
|
||||
padded_values = rest + [None] * (len(fields) - len(rest))
|
||||
parsed = {field: value for field, value in zip(fields, padded_values)}
|
||||
|
||||
return {"type": type_, **parsed}
|
||||
@@ -1,16 +1,18 @@
|
||||
from flask import Flask
|
||||
from conmorfeusz.web import analyzer
|
||||
from . import analyzer, generator
|
||||
|
||||
def create_app():
|
||||
def create_app(config=None):
|
||||
app = Flask(__name__, instance_relative_config=True)
|
||||
|
||||
app.config.from_mapping(
|
||||
SECRET_KEY='dev'
|
||||
SECRET_KEY='dev',
|
||||
CONCRAFT_PL_PORT=3000,
|
||||
)
|
||||
|
||||
if config:
|
||||
app.config.update(config)
|
||||
|
||||
app.register_blueprint(analyzer.bp)
|
||||
app.register_blueprint(generator.bp)
|
||||
|
||||
return app
|
||||
|
||||
def start():
|
||||
app = create_app()
|
||||
app.run()
|
||||
@@ -1,4 +1,4 @@
|
||||
from flask import Blueprint, request
|
||||
from flask import Blueprint, request, current_app
|
||||
from conmorfeusz.service import analyzer as service
|
||||
|
||||
bp = Blueprint('analyser', __name__, url_prefix='/analyser')
|
||||
@@ -8,5 +8,5 @@ def analyze():
|
||||
data = request.get_json()
|
||||
|
||||
return {
|
||||
"analysis": service.analyse(data["text"])
|
||||
"analysis": service.analyse(data["text"], data["disamb"] if "disamb" in data else False, current_app.config['CONCRAFT_PL_PORT'])
|
||||
}
|
||||
12
conmorfeusz/conmorfeusz/web/generator.py
Normal file
12
conmorfeusz/conmorfeusz/web/generator.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from flask import Blueprint, request, current_app
|
||||
from conmorfeusz.service import generator as service
|
||||
|
||||
bp = Blueprint('generator', __name__, url_prefix='/generator')
|
||||
|
||||
@bp.post("/generate")
|
||||
def analyze():
|
||||
data = request.get_json()
|
||||
|
||||
return {
|
||||
"output": service.generate(data["lemma"])
|
||||
}
|
||||
@@ -1,13 +1,12 @@
|
||||
{
|
||||
self,
|
||||
system,
|
||||
pkgs,
|
||||
fetchurl,
|
||||
libgcc,
|
||||
stdenv,
|
||||
autoPatchelfHook,
|
||||
makeWrapper,
|
||||
defaultModel ? self.packages.${system}.concraft-pl-sgjp-model,
|
||||
defaultModel ? self.packages.${stdenv.hostPlatform.system}.concraft-pl-sgjp-model,
|
||||
...
|
||||
}: let
|
||||
python = pkgs.python311; # Możesz zmienić wersję Python
|
||||
@@ -48,6 +47,8 @@ in
|
||||
morfeusz2
|
||||
flask
|
||||
requests
|
||||
click
|
||||
gunicorn
|
||||
];
|
||||
|
||||
nativeBuildInputs = with pythonPackages; [
|
||||
@@ -59,6 +60,6 @@ in
|
||||
postInstall = ''
|
||||
wrapProgram $out/bin/conmorfeusz \
|
||||
--set CONCRAFT_PL_MODEL "${defaultModel}" \
|
||||
--set CONCRAFT_PL_BIN "${self.packages.${system}.concraft-pl}/bin/concraft-pl"
|
||||
--set CONCRAFT_PL_BIN "${self.packages.${stdenv.hostPlatform.system}.concraft-pl}/bin/concraft-pl"
|
||||
'';
|
||||
}
|
||||
|
||||
@@ -11,7 +11,9 @@ requires-python = "==3.6"
|
||||
dependencies = [
|
||||
"morfeusz2==1.99.12",
|
||||
"flask",
|
||||
"requests"
|
||||
"requests",
|
||||
"click",
|
||||
"gunicorn>=21.0.0"
|
||||
]
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
|
||||
@@ -28,5 +28,11 @@
|
||||
drv = conmorfeusz;
|
||||
name = "conmorfeusz"; # Nazwa skryptu/modułu do uruchomienia
|
||||
};
|
||||
});
|
||||
})
|
||||
// {
|
||||
nixosModules = rec {
|
||||
conmorfeusz = import ./module.nix self;
|
||||
default = conmorfeusz;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
115
module.nix
Normal file
115
module.nix
Normal file
@@ -0,0 +1,115 @@
|
||||
# module.nix
|
||||
self: {
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}: let
|
||||
cfg = config.services.conmorfeusz;
|
||||
in {
|
||||
options.services.conmorfeusz = {
|
||||
enableMorfeusz = lib.mkEnableOption "Morfeusz in environment.systemPackages";
|
||||
|
||||
enableConcraftPl = lib.mkEnableOption "Concraft-pl in environment.systemPackages";
|
||||
|
||||
conmorfeusz = {
|
||||
enable = lib.mkEnableOption "Conmorfeusz service";
|
||||
|
||||
config = lib.mkOption {
|
||||
type = lib.types.attrsOf (lib.types.oneOf [lib.types.str lib.types.int lib.types.bool]);
|
||||
default = {};
|
||||
example = {
|
||||
host = "0.0.0.0";
|
||||
port = 8888;
|
||||
workers = 4;
|
||||
};
|
||||
description = ''
|
||||
Configuration passed to conmorfeusz as CLI arguments.
|
||||
Each key will be transformed into --key value.
|
||||
'';
|
||||
};
|
||||
|
||||
concraft-pl = lib.mkOption {
|
||||
type = lib.types.attrsOf (lib.types.oneOf [lib.types.str lib.types.int lib.types.bool]);
|
||||
default = {};
|
||||
example = {
|
||||
port = 3000;
|
||||
bin = "/\${pkgs.concraft-pl}/bin/concraft-pl";
|
||||
core-num = 4;
|
||||
};
|
||||
description = ''
|
||||
Concraft-pl configuration passed to conmorfeusz as CLI arguments.
|
||||
Each key will be transformed into --ccpl-key value.
|
||||
'';
|
||||
};
|
||||
|
||||
openFirewall = lib.mkOption {
|
||||
type = lib.types.bool;
|
||||
default = false;
|
||||
description = "Whether to open the conmorfeusz port in the firewall.";
|
||||
};
|
||||
|
||||
openConcraftPlFirewall = lib.mkOption {
|
||||
type = lib.types.bool;
|
||||
default = false;
|
||||
description = "Whether to open the concraft-pl port in the firewall.";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
config = lib.mkMerge [
|
||||
# Add morfeusz to systemPackages if enabled
|
||||
(lib.mkIf cfg.enableMorfeusz {
|
||||
environment.systemPackages = [self.packages.${pkgs.system}.morfeusz];
|
||||
})
|
||||
|
||||
# Add concraft-pl to systemPackages if enabled
|
||||
(lib.mkIf cfg.enableConcraftPl {
|
||||
environment.systemPackages = [self.packages.${pkgs.system}.concraft-pl];
|
||||
})
|
||||
|
||||
# Conmorfeusz service configuration
|
||||
(lib.mkIf cfg.conmorfeusz.enable {
|
||||
systemd.services.conmorfeusz = {
|
||||
description = "Conmorfeusz - Morfeusz + Concraft-pl service";
|
||||
wantedBy = ["multi-user.target"];
|
||||
|
||||
serviceConfig = {
|
||||
Type = "simple";
|
||||
ExecStart = let
|
||||
mainArgs = lib.cli.toGNUCommandLineShell {} cfg.conmorfeusz.config;
|
||||
|
||||
ccplArgs =
|
||||
lib.cli.toGNUCommandLineShell {
|
||||
mkOptionName = k: "--ccpl-${k}";
|
||||
}
|
||||
cfg.conmorfeusz.concraft-pl;
|
||||
|
||||
allArgs = lib.concatStringsSep " " (lib.filter (s: s != "") [mainArgs ccplArgs]);
|
||||
in "${self.packages.${pkgs.system}.conmorfeusz}/bin/conmorfeusz ${allArgs}";
|
||||
|
||||
Restart = "on-failure";
|
||||
RestartSec = 5;
|
||||
|
||||
# Security hardening
|
||||
DynamicUser = true;
|
||||
NoNewPrivileges = true;
|
||||
ProtectSystem = "strict";
|
||||
ProtectHome = true;
|
||||
PrivateTmp = true;
|
||||
};
|
||||
};
|
||||
|
||||
# Open conmorfeusz port in firewall
|
||||
networking.firewall.allowedTCPPorts =
|
||||
lib.mkIf cfg.conmorfeusz.openFirewall
|
||||
(lib.optional (cfg.conmorfeusz.config ? port) cfg.conmorfeusz.config.port);
|
||||
})
|
||||
|
||||
# Open concraft-pl port in firewall (separate, as it can be enabled independently)
|
||||
(lib.mkIf (cfg.conmorfeusz.enable && cfg.conmorfeusz.openConcraftPlFirewall) {
|
||||
networking.firewall.allowedTCPPorts =
|
||||
lib.optional (cfg.conmorfeusz.concraft-pl ? port) cfg.conmorfeusz.concraft-pl.port;
|
||||
})
|
||||
];
|
||||
}
|
||||
Reference in New Issue
Block a user