diff --git a/concraft-pl-sgjp-model/default.nix b/concraft-pl-sgjp-model/default.nix new file mode 100644 index 0000000..d268179 --- /dev/null +++ b/concraft-pl-sgjp-model/default.nix @@ -0,0 +1,5 @@ +{fetchurl, ...}: +fetchurl { + url = "https://zil.ipipan.waw.pl/Concraft?action=AttachFile&do=get&target=concraft-pl-model-SGJP-20220221.gz"; + hash = "sha256-VcvdSkJwUhAgHroA0d/bH3QDjjO/2x8HqSuUvRgIN/4="; +} diff --git a/conmorfeusz/conmorfeusz/concraft/__init__.py b/conmorfeusz/conmorfeusz/concraft/__init__.py new file mode 100644 index 0000000..c834814 --- /dev/null +++ b/conmorfeusz/conmorfeusz/concraft/__init__.py @@ -0,0 +1,126 @@ +#! /usr/bin/python +# *-* coding: utf-8 *-* +# +# Authors: +# * Witek Kieraś +# * Kuba Waszczuk + + +import requests +import json +import time +from subprocess import Popen, PIPE +from contextlib import contextmanager + +@contextmanager +def start_server(*args): + """ + Allows to start the server using 'with' statement. + """ + server = Server(*args) + try: + yield server + finally: + server.terminate() + +class Concraft(object): + def __init__(self, server_addr='http://localhost', port=3000): + """ + Parameters + ---------- + server_addr : url + Address of the Concraft-pl server + port : Server + Port number used by the Concraft-pl server + """ + self.server_addr = server_addr + ":{}/parse".format(port) + + def dag_to_str(self, morf_dag): + """ + Convert a DAG in the Morfeusz-compliant format to a DAG in the + Concraft-compliant format. + """ + conc_dag = '' + for item in morf_dag: + num1, num2, (forma, lemat, tag, posp, kwal) = item + line_string = '\t'.join((str(num1), str(num2), forma, lemat, tag, ','.join(posp), ','.join(kwal), '0.0', '', '', '' + '\n')) + conc_dag += line_string + return conc_dag + + def str_to_dag(self, dag_str): + """ + Reverse of `dag_to_str`. + """ + analyse_list = [] + for line in dag_str.split('\n'): + if line != '': + num1, num2, forma, lemat, tag, posp, kwal, prob, interp_meta, eos, seg_meta, disamb = line.strip('\n').split('\t') + eos = 'eos' if eos else None + disamb = 'disamb' if disamb else None + posp = posp.split(',') if posp else [] + kwal = kwal.split(',') if kwal else [] + analyse_list.append((int(num1), int(num2), (forma, lemat, tag, posp, kwal), prob, eos, disamb)) + else: + analyse_list.append("") + return analyse_list + + def disamb_str(self, dag): + """ + Disambiguate a DAG represented as a string in the Concraft-compliant + format (tab separated string with one arc represented per line). + """ + analyse_list = [] + # TODO: only add '\n' if necessary! + request_data = {'dag':dag + '\n'} + r = requests.post(self.server_addr, data=json.dumps(request_data)) + return r.json()['dag'] + + def disamb(self, dag): + """ + Disambiguate a DAG represented in the Morfeusz-compliant format. + """ + dag_str = self.dag_to_str(dag) + dag_result = self.disamb_str(dag_str) + return self.str_to_dag(dag_result) + + +class Server(object): + def __init__(self, model_path, concraft_path="concraft-pl", port=3000, + core_num=1, allocation_size=64): + """ + Start a Concraft-pl server instance in the background. + + Parameters + ---------- + model_path : path + Path to a Concraft-pl model + concraft_path : path + Path to a Concraft-pl executable + port : int + Port number to be used to run a Concraft-pl server instance + core_num : int + Number of processor cores to use + allocation_size : int + Allocation area size (in MBs) of the garbage collector + """ + self.port = port + self.concraft_server = Popen([concraft_path, 'server', + '--port={}'.format(port), '-i', model_path, '+RTS', + '-N{}'.format(core_num), '-A{}M'.format(allocation_size),], + stdin=PIPE, stdout=PIPE, stderr=PIPE) + # print(u"Concraft model " + model_path + u" loading...") + loaded = False + while not loaded: + try: + request_data = {'dag':''} + r = requests.post('http://localhost:{}/parse'.format(port), + data=json.dumps(request_data)) + loaded = True + #print(u"loaded!") + except requests.ConnectionError as e: + #print(u"loading�~@�") + time.sleep(1) + + def terminate(self): + """Terminate the Concraft-pl server.""" + self.concraft_server.terminate() \ No newline at end of file diff --git a/conmorfeusz/conmorfeusz/main.py b/conmorfeusz/conmorfeusz/main.py index 8fcbee7..6d325e7 100644 --- a/conmorfeusz/conmorfeusz/main.py +++ b/conmorfeusz/conmorfeusz/main.py @@ -1,4 +1,14 @@ -import conmorfeusz.web as web +import os +from . import runner def main(): - web.start() \ No newline at end of file + exe = os.environ.get('CONCRAFT_PL_BIN') + model = os.environ.get('CONCRAFT_PL_MODEL') + port=3000 + core_num=1 + allocation_size=64 + + concraft_cfg = (model, exe, port, core_num, allocation_size) + runner.run(concraft_cfg) + + \ No newline at end of file diff --git a/conmorfeusz/conmorfeusz/runner/__init__.py b/conmorfeusz/conmorfeusz/runner/__init__.py new file mode 100644 index 0000000..32cb3cc --- /dev/null +++ b/conmorfeusz/conmorfeusz/runner/__init__.py @@ -0,0 +1,9 @@ +import conmorfeusz.web as web +import conmorfeusz.concraft as cc + +def run(concraft): + """ + Glues all services together and starts the holistic app. + """ + with cc.start_server(*concraft): + web.start() \ No newline at end of file diff --git a/conmorfeusz/conmorfeusz/service/analyzer.py b/conmorfeusz/conmorfeusz/service/analyzer.py index d5a1bac..70a5f5a 100644 --- a/conmorfeusz/conmorfeusz/service/analyzer.py +++ b/conmorfeusz/conmorfeusz/service/analyzer.py @@ -1,5 +1,8 @@ import morfeusz2 +from conmorfeusz import concraft def analyse(text): morf = morfeusz2.Morfeusz(expand_tags=True) - return morf.analyse(text) \ No newline at end of file + conc = concraft.Concraft() + analysis = morf.analyse(text) + return conc.disamb(analysis) \ No newline at end of file diff --git a/conmorfeusz/default.nix b/conmorfeusz/default.nix index 5cb8b82..134cdcc 100644 --- a/conmorfeusz/default.nix +++ b/conmorfeusz/default.nix @@ -1,9 +1,13 @@ { + self, + system, pkgs, fetchurl, libgcc, stdenv, autoPatchelfHook, + makeWrapper, + defaultModel ? self.packages.${system}.concraft-pl-sgjp-model, ... }: let python = pkgs.python311; # Możesz zmienić wersję Python @@ -43,10 +47,18 @@ in propagatedBuildInputs = with pythonPackages; [ morfeusz2 flask + requests ]; nativeBuildInputs = with pythonPackages; [ + makeWrapper setuptools wheel ]; + + postInstall = '' + wrapProgram $out/bin/conmorfeusz \ + --set CONCRAFT_PL_MODEL "${defaultModel}" \ + --set CONCRAFT_PL_BIN "${self.packages.${system}.concraft-pl}/bin/concraft-pl" + ''; } diff --git a/conmorfeusz/pyproject.toml b/conmorfeusz/pyproject.toml index ea186a8..6790162 100644 --- a/conmorfeusz/pyproject.toml +++ b/conmorfeusz/pyproject.toml @@ -10,7 +10,8 @@ requires-python = "==3.6" dependencies = [ "morfeusz2==1.99.12", - "flask" + "flask", + "requests" ] [tool.setuptools.packages.find] diff --git a/flake.nix b/flake.nix index 381f625..e97e008 100644 --- a/flake.nix +++ b/flake.nix @@ -16,9 +16,10 @@ conmorfeusz = pkgs.callPackage ./conmorfeusz {inherit self;}; morfeusz = pkgs.callPackage ./morfeusz {inherit self;}; concraft-pl = pkgs.callPackage ./concraft-pl {inherit self;}; + concraft-pl-sgjp-model = pkgs.callPackage ./concraft-pl-sgjp-model {inherit self;}; in { packages = rec { - inherit conmorfeusz morfeusz concraft-pl; + inherit conmorfeusz morfeusz concraft-pl concraft-pl-sgjp-model; default = conmorfeusz; };