Connect Morfeusz and Concraft
This commit is contained in:
5
concraft-pl-sgjp-model/default.nix
Normal file
5
concraft-pl-sgjp-model/default.nix
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{fetchurl, ...}:
|
||||||
|
fetchurl {
|
||||||
|
url = "https://zil.ipipan.waw.pl/Concraft?action=AttachFile&do=get&target=concraft-pl-model-SGJP-20220221.gz";
|
||||||
|
hash = "sha256-VcvdSkJwUhAgHroA0d/bH3QDjjO/2x8HqSuUvRgIN/4=";
|
||||||
|
}
|
||||||
126
conmorfeusz/conmorfeusz/concraft/__init__.py
Normal file
126
conmorfeusz/conmorfeusz/concraft/__init__.py
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
#! /usr/bin/python
|
||||||
|
# *-* coding: utf-8 *-*
|
||||||
|
#
|
||||||
|
# Authors:
|
||||||
|
# * Witek Kieraś
|
||||||
|
# * Kuba Waszczuk
|
||||||
|
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from subprocess import Popen, PIPE
|
||||||
|
from contextlib import contextmanager
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def start_server(*args):
|
||||||
|
"""
|
||||||
|
Allows to start the server using 'with' statement.
|
||||||
|
"""
|
||||||
|
server = Server(*args)
|
||||||
|
try:
|
||||||
|
yield server
|
||||||
|
finally:
|
||||||
|
server.terminate()
|
||||||
|
|
||||||
|
class Concraft(object):
|
||||||
|
def __init__(self, server_addr='http://localhost', port=3000):
|
||||||
|
"""
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
server_addr : url
|
||||||
|
Address of the Concraft-pl server
|
||||||
|
port : Server
|
||||||
|
Port number used by the Concraft-pl server
|
||||||
|
"""
|
||||||
|
self.server_addr = server_addr + ":{}/parse".format(port)
|
||||||
|
|
||||||
|
def dag_to_str(self, morf_dag):
|
||||||
|
"""
|
||||||
|
Convert a DAG in the Morfeusz-compliant format to a DAG in the
|
||||||
|
Concraft-compliant format.
|
||||||
|
"""
|
||||||
|
conc_dag = ''
|
||||||
|
for item in morf_dag:
|
||||||
|
num1, num2, (forma, lemat, tag, posp, kwal) = item
|
||||||
|
line_string = '\t'.join((str(num1), str(num2), forma, lemat, tag, ','.join(posp), ','.join(kwal), '0.0', '', '', '' + '\n'))
|
||||||
|
conc_dag += line_string
|
||||||
|
return conc_dag
|
||||||
|
|
||||||
|
def str_to_dag(self, dag_str):
|
||||||
|
"""
|
||||||
|
Reverse of `dag_to_str`.
|
||||||
|
"""
|
||||||
|
analyse_list = []
|
||||||
|
for line in dag_str.split('\n'):
|
||||||
|
if line != '':
|
||||||
|
num1, num2, forma, lemat, tag, posp, kwal, prob, interp_meta, eos, seg_meta, disamb = line.strip('\n').split('\t')
|
||||||
|
eos = 'eos' if eos else None
|
||||||
|
disamb = 'disamb' if disamb else None
|
||||||
|
posp = posp.split(',') if posp else []
|
||||||
|
kwal = kwal.split(',') if kwal else []
|
||||||
|
analyse_list.append((int(num1), int(num2), (forma, lemat, tag, posp, kwal), prob, eos, disamb))
|
||||||
|
else:
|
||||||
|
analyse_list.append("")
|
||||||
|
return analyse_list
|
||||||
|
|
||||||
|
def disamb_str(self, dag):
|
||||||
|
"""
|
||||||
|
Disambiguate a DAG represented as a string in the Concraft-compliant
|
||||||
|
format (tab separated string with one arc represented per line).
|
||||||
|
"""
|
||||||
|
analyse_list = []
|
||||||
|
# TODO: only add '\n' if necessary!
|
||||||
|
request_data = {'dag':dag + '\n'}
|
||||||
|
r = requests.post(self.server_addr, data=json.dumps(request_data))
|
||||||
|
return r.json()['dag']
|
||||||
|
|
||||||
|
def disamb(self, dag):
|
||||||
|
"""
|
||||||
|
Disambiguate a DAG represented in the Morfeusz-compliant format.
|
||||||
|
"""
|
||||||
|
dag_str = self.dag_to_str(dag)
|
||||||
|
dag_result = self.disamb_str(dag_str)
|
||||||
|
return self.str_to_dag(dag_result)
|
||||||
|
|
||||||
|
|
||||||
|
class Server(object):
|
||||||
|
def __init__(self, model_path, concraft_path="concraft-pl", port=3000,
|
||||||
|
core_num=1, allocation_size=64):
|
||||||
|
"""
|
||||||
|
Start a Concraft-pl server instance in the background.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
model_path : path
|
||||||
|
Path to a Concraft-pl model
|
||||||
|
concraft_path : path
|
||||||
|
Path to a Concraft-pl executable
|
||||||
|
port : int
|
||||||
|
Port number to be used to run a Concraft-pl server instance
|
||||||
|
core_num : int
|
||||||
|
Number of processor cores to use
|
||||||
|
allocation_size : int
|
||||||
|
Allocation area size (in MBs) of the garbage collector
|
||||||
|
"""
|
||||||
|
self.port = port
|
||||||
|
self.concraft_server = Popen([concraft_path, 'server',
|
||||||
|
'--port={}'.format(port), '-i', model_path, '+RTS',
|
||||||
|
'-N{}'.format(core_num), '-A{}M'.format(allocation_size),],
|
||||||
|
stdin=PIPE, stdout=PIPE, stderr=PIPE)
|
||||||
|
# print(u"Concraft model " + model_path + u" loading...")
|
||||||
|
loaded = False
|
||||||
|
while not loaded:
|
||||||
|
try:
|
||||||
|
request_data = {'dag':''}
|
||||||
|
r = requests.post('http://localhost:{}/parse'.format(port),
|
||||||
|
data=json.dumps(request_data))
|
||||||
|
loaded = True
|
||||||
|
#print(u"loaded!")
|
||||||
|
except requests.ConnectionError as e:
|
||||||
|
#print(u"loading<6E>~@<40>")
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
def terminate(self):
|
||||||
|
"""Terminate the Concraft-pl server."""
|
||||||
|
self.concraft_server.terminate()
|
||||||
@@ -1,4 +1,14 @@
|
|||||||
import conmorfeusz.web as web
|
import os
|
||||||
|
from . import runner
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
web.start()
|
exe = os.environ.get('CONCRAFT_PL_BIN')
|
||||||
|
model = os.environ.get('CONCRAFT_PL_MODEL')
|
||||||
|
port=3000
|
||||||
|
core_num=1
|
||||||
|
allocation_size=64
|
||||||
|
|
||||||
|
concraft_cfg = (model, exe, port, core_num, allocation_size)
|
||||||
|
runner.run(concraft_cfg)
|
||||||
|
|
||||||
|
|
||||||
9
conmorfeusz/conmorfeusz/runner/__init__.py
Normal file
9
conmorfeusz/conmorfeusz/runner/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
import conmorfeusz.web as web
|
||||||
|
import conmorfeusz.concraft as cc
|
||||||
|
|
||||||
|
def run(concraft):
|
||||||
|
"""
|
||||||
|
Glues all services together and starts the holistic app.
|
||||||
|
"""
|
||||||
|
with cc.start_server(*concraft):
|
||||||
|
web.start()
|
||||||
@@ -1,5 +1,8 @@
|
|||||||
import morfeusz2
|
import morfeusz2
|
||||||
|
from conmorfeusz import concraft
|
||||||
|
|
||||||
def analyse(text):
|
def analyse(text):
|
||||||
morf = morfeusz2.Morfeusz(expand_tags=True)
|
morf = morfeusz2.Morfeusz(expand_tags=True)
|
||||||
return morf.analyse(text)
|
conc = concraft.Concraft()
|
||||||
|
analysis = morf.analyse(text)
|
||||||
|
return conc.disamb(analysis)
|
||||||
@@ -1,9 +1,13 @@
|
|||||||
{
|
{
|
||||||
|
self,
|
||||||
|
system,
|
||||||
pkgs,
|
pkgs,
|
||||||
fetchurl,
|
fetchurl,
|
||||||
libgcc,
|
libgcc,
|
||||||
stdenv,
|
stdenv,
|
||||||
autoPatchelfHook,
|
autoPatchelfHook,
|
||||||
|
makeWrapper,
|
||||||
|
defaultModel ? self.packages.${system}.concraft-pl-sgjp-model,
|
||||||
...
|
...
|
||||||
}: let
|
}: let
|
||||||
python = pkgs.python311; # Możesz zmienić wersję Python
|
python = pkgs.python311; # Możesz zmienić wersję Python
|
||||||
@@ -43,10 +47,18 @@ in
|
|||||||
propagatedBuildInputs = with pythonPackages; [
|
propagatedBuildInputs = with pythonPackages; [
|
||||||
morfeusz2
|
morfeusz2
|
||||||
flask
|
flask
|
||||||
|
requests
|
||||||
];
|
];
|
||||||
|
|
||||||
nativeBuildInputs = with pythonPackages; [
|
nativeBuildInputs = with pythonPackages; [
|
||||||
|
makeWrapper
|
||||||
setuptools
|
setuptools
|
||||||
wheel
|
wheel
|
||||||
];
|
];
|
||||||
|
|
||||||
|
postInstall = ''
|
||||||
|
wrapProgram $out/bin/conmorfeusz \
|
||||||
|
--set CONCRAFT_PL_MODEL "${defaultModel}" \
|
||||||
|
--set CONCRAFT_PL_BIN "${self.packages.${system}.concraft-pl}/bin/concraft-pl"
|
||||||
|
'';
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,7 +10,8 @@ requires-python = "==3.6"
|
|||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"morfeusz2==1.99.12",
|
"morfeusz2==1.99.12",
|
||||||
"flask"
|
"flask",
|
||||||
|
"requests"
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.setuptools.packages.find]
|
[tool.setuptools.packages.find]
|
||||||
|
|||||||
@@ -16,9 +16,10 @@
|
|||||||
conmorfeusz = pkgs.callPackage ./conmorfeusz {inherit self;};
|
conmorfeusz = pkgs.callPackage ./conmorfeusz {inherit self;};
|
||||||
morfeusz = pkgs.callPackage ./morfeusz {inherit self;};
|
morfeusz = pkgs.callPackage ./morfeusz {inherit self;};
|
||||||
concraft-pl = pkgs.callPackage ./concraft-pl {inherit self;};
|
concraft-pl = pkgs.callPackage ./concraft-pl {inherit self;};
|
||||||
|
concraft-pl-sgjp-model = pkgs.callPackage ./concraft-pl-sgjp-model {inherit self;};
|
||||||
in {
|
in {
|
||||||
packages = rec {
|
packages = rec {
|
||||||
inherit conmorfeusz morfeusz concraft-pl;
|
inherit conmorfeusz morfeusz concraft-pl concraft-pl-sgjp-model;
|
||||||
default = conmorfeusz;
|
default = conmorfeusz;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user