Connect Morfeusz and Concraft

This commit is contained in:
2026-01-09 13:17:44 +01:00
parent fc86a44755
commit 9fe538014e
8 changed files with 172 additions and 5 deletions

View File

@@ -0,0 +1,5 @@
{fetchurl, ...}:
fetchurl {
url = "https://zil.ipipan.waw.pl/Concraft?action=AttachFile&do=get&target=concraft-pl-model-SGJP-20220221.gz";
hash = "sha256-VcvdSkJwUhAgHroA0d/bH3QDjjO/2x8HqSuUvRgIN/4=";
}

View File

@@ -0,0 +1,126 @@
#! /usr/bin/python
# *-* coding: utf-8 *-*
#
# Authors:
# * Witek Kieraś
# * Kuba Waszczuk
import requests
import json
import time
from subprocess import Popen, PIPE
from contextlib import contextmanager
@contextmanager
def start_server(*args):
"""
Allows to start the server using 'with' statement.
"""
server = Server(*args)
try:
yield server
finally:
server.terminate()
class Concraft(object):
def __init__(self, server_addr='http://localhost', port=3000):
"""
Parameters
----------
server_addr : url
Address of the Concraft-pl server
port : Server
Port number used by the Concraft-pl server
"""
self.server_addr = server_addr + ":{}/parse".format(port)
def dag_to_str(self, morf_dag):
"""
Convert a DAG in the Morfeusz-compliant format to a DAG in the
Concraft-compliant format.
"""
conc_dag = ''
for item in morf_dag:
num1, num2, (forma, lemat, tag, posp, kwal) = item
line_string = '\t'.join((str(num1), str(num2), forma, lemat, tag, ','.join(posp), ','.join(kwal), '0.0', '', '', '' + '\n'))
conc_dag += line_string
return conc_dag
def str_to_dag(self, dag_str):
"""
Reverse of `dag_to_str`.
"""
analyse_list = []
for line in dag_str.split('\n'):
if line != '':
num1, num2, forma, lemat, tag, posp, kwal, prob, interp_meta, eos, seg_meta, disamb = line.strip('\n').split('\t')
eos = 'eos' if eos else None
disamb = 'disamb' if disamb else None
posp = posp.split(',') if posp else []
kwal = kwal.split(',') if kwal else []
analyse_list.append((int(num1), int(num2), (forma, lemat, tag, posp, kwal), prob, eos, disamb))
else:
analyse_list.append("")
return analyse_list
def disamb_str(self, dag):
"""
Disambiguate a DAG represented as a string in the Concraft-compliant
format (tab separated string with one arc represented per line).
"""
analyse_list = []
# TODO: only add '\n' if necessary!
request_data = {'dag':dag + '\n'}
r = requests.post(self.server_addr, data=json.dumps(request_data))
return r.json()['dag']
def disamb(self, dag):
"""
Disambiguate a DAG represented in the Morfeusz-compliant format.
"""
dag_str = self.dag_to_str(dag)
dag_result = self.disamb_str(dag_str)
return self.str_to_dag(dag_result)
class Server(object):
def __init__(self, model_path, concraft_path="concraft-pl", port=3000,
core_num=1, allocation_size=64):
"""
Start a Concraft-pl server instance in the background.
Parameters
----------
model_path : path
Path to a Concraft-pl model
concraft_path : path
Path to a Concraft-pl executable
port : int
Port number to be used to run a Concraft-pl server instance
core_num : int
Number of processor cores to use
allocation_size : int
Allocation area size (in MBs) of the garbage collector
"""
self.port = port
self.concraft_server = Popen([concraft_path, 'server',
'--port={}'.format(port), '-i', model_path, '+RTS',
'-N{}'.format(core_num), '-A{}M'.format(allocation_size),],
stdin=PIPE, stdout=PIPE, stderr=PIPE)
# print(u"Concraft model " + model_path + u" loading...")
loaded = False
while not loaded:
try:
request_data = {'dag':''}
r = requests.post('http://localhost:{}/parse'.format(port),
data=json.dumps(request_data))
loaded = True
#print(u"loaded!")
except requests.ConnectionError as e:
#print(u"loading<6E>~@<40>")
time.sleep(1)
def terminate(self):
"""Terminate the Concraft-pl server."""
self.concraft_server.terminate()

View File

@@ -1,4 +1,14 @@
import conmorfeusz.web as web import os
from . import runner
def main(): def main():
web.start() exe = os.environ.get('CONCRAFT_PL_BIN')
model = os.environ.get('CONCRAFT_PL_MODEL')
port=3000
core_num=1
allocation_size=64
concraft_cfg = (model, exe, port, core_num, allocation_size)
runner.run(concraft_cfg)

View File

@@ -0,0 +1,9 @@
import conmorfeusz.web as web
import conmorfeusz.concraft as cc
def run(concraft):
"""
Glues all services together and starts the holistic app.
"""
with cc.start_server(*concraft):
web.start()

View File

@@ -1,5 +1,8 @@
import morfeusz2 import morfeusz2
from conmorfeusz import concraft
def analyse(text): def analyse(text):
morf = morfeusz2.Morfeusz(expand_tags=True) morf = morfeusz2.Morfeusz(expand_tags=True)
return morf.analyse(text) conc = concraft.Concraft()
analysis = morf.analyse(text)
return conc.disamb(analysis)

View File

@@ -1,9 +1,13 @@
{ {
self,
system,
pkgs, pkgs,
fetchurl, fetchurl,
libgcc, libgcc,
stdenv, stdenv,
autoPatchelfHook, autoPatchelfHook,
makeWrapper,
defaultModel ? self.packages.${system}.concraft-pl-sgjp-model,
... ...
}: let }: let
python = pkgs.python311; # Możesz zmienić wersję Python python = pkgs.python311; # Możesz zmienić wersję Python
@@ -43,10 +47,18 @@ in
propagatedBuildInputs = with pythonPackages; [ propagatedBuildInputs = with pythonPackages; [
morfeusz2 morfeusz2
flask flask
requests
]; ];
nativeBuildInputs = with pythonPackages; [ nativeBuildInputs = with pythonPackages; [
makeWrapper
setuptools setuptools
wheel wheel
]; ];
postInstall = ''
wrapProgram $out/bin/conmorfeusz \
--set CONCRAFT_PL_MODEL "${defaultModel}" \
--set CONCRAFT_PL_BIN "${self.packages.${system}.concraft-pl}/bin/concraft-pl"
'';
} }

View File

@@ -10,7 +10,8 @@ requires-python = "==3.6"
dependencies = [ dependencies = [
"morfeusz2==1.99.12", "morfeusz2==1.99.12",
"flask" "flask",
"requests"
] ]
[tool.setuptools.packages.find] [tool.setuptools.packages.find]

View File

@@ -16,9 +16,10 @@
conmorfeusz = pkgs.callPackage ./conmorfeusz {inherit self;}; conmorfeusz = pkgs.callPackage ./conmorfeusz {inherit self;};
morfeusz = pkgs.callPackage ./morfeusz {inherit self;}; morfeusz = pkgs.callPackage ./morfeusz {inherit self;};
concraft-pl = pkgs.callPackage ./concraft-pl {inherit self;}; concraft-pl = pkgs.callPackage ./concraft-pl {inherit self;};
concraft-pl-sgjp-model = pkgs.callPackage ./concraft-pl-sgjp-model {inherit self;};
in { in {
packages = rec { packages = rec {
inherit conmorfeusz morfeusz concraft-pl; inherit conmorfeusz morfeusz concraft-pl concraft-pl-sgjp-model;
default = conmorfeusz; default = conmorfeusz;
}; };