Add conmorfeusz as synergy of morfeusz and concraft-pl
This commit is contained in:
157
index.js
Executable file
157
index.js
Executable file
@@ -0,0 +1,157 @@
|
||||
const { spawnSync } = require("child_process");
|
||||
|
||||
async function analyse(input) {
|
||||
const { stdout } = spawnSync(MORFEUSZ_ANALYSER_BIN, [], { input });
|
||||
|
||||
const linePattern = /^\s?(?<opening>\[)?(?<start>\d+),(?<end>\d+),(?<segment>,|(.+?)),(?<lemma>,|(.+?)),(?<tags>[\w:.]+),(?<frequency>_|(.+?)),(?<qualifiers>_|(.+?))(?<ending>\])?$/;
|
||||
|
||||
const output = [];
|
||||
let segment = undefined;
|
||||
let inSegment = false;
|
||||
|
||||
for (const line of stdout.toString().split(/\r?\n|\r/)) {
|
||||
if (line.trim().length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const match = line.match(linePattern);
|
||||
|
||||
if (!match) {
|
||||
throw new Error(`Following line does not match the pattern: ${line}`);
|
||||
}
|
||||
|
||||
const { opening, ending, ...data } = match.groups;
|
||||
|
||||
if (opening) {
|
||||
inSegment = true;
|
||||
segment = [];
|
||||
}
|
||||
|
||||
segment.push(data);
|
||||
|
||||
if (ending) {
|
||||
inSegment = false;
|
||||
output.push(segment);
|
||||
segment = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
async function runConcraftLocally(input) {
|
||||
const { stdout, stderr } = spawnSync(CONCRAFT_BIN, ["tag", CONCRAFT_MODEL], { input });
|
||||
|
||||
return stdout.toString()
|
||||
}
|
||||
|
||||
async function invokeConcraftRemotely(dag) {
|
||||
const response = await fetch(`${CONCRAFT_SERVER_URL}/parse`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
dag
|
||||
})
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
return data.dag;
|
||||
}
|
||||
|
||||
async function desambiguate(analysis) {
|
||||
const input = analysis
|
||||
.flatMap(a => a)
|
||||
.map(a => ({ ...a, tags: a.tags.split(":").map(t => t.split(".")[0]).join(":") }))
|
||||
.map(a => `${[...Object.values(a), "0.0", "_", "_", "_"].join("\t")}`)
|
||||
.join("\n");
|
||||
|
||||
|
||||
const response = await (CONCRAFT_MODE === 'local' ? runConcraftLocally : invokeConcraftRemotely)(input);
|
||||
|
||||
const keys = ["start", "end", "segment", "lemma", "tags", "frequency", "qualifiers", "prob", "interp_meta", "eos", "seg_meta", "disamb"];
|
||||
const output = [];
|
||||
for (const line of response.split(/\r?\n|\r/)) {
|
||||
if (line.trim().length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const properties = line.split(/\t/).map((prop, index) => [keys[index], prop === '_' ? '' : prop]);
|
||||
const element = Object.fromEntries(properties);
|
||||
|
||||
element.start = Number.parseInt(element.start);
|
||||
element.end = Number.parseInt(element.end);
|
||||
element.disamb = element.disamb === 'disamb';
|
||||
element.eos = element.eos === 'eos';
|
||||
element.prob = Number.parseFloat(element.prob);
|
||||
|
||||
output.push(element);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
async function danalyse(input) {
|
||||
const analysis = await analyse(input);
|
||||
return await desambiguate(analysis);
|
||||
}
|
||||
|
||||
function generate(text, ...tags) {
|
||||
const { stdout } = spawnSync(MORFEUSZ_GENERATOR_BIN, [], { input });
|
||||
|
||||
const linePattern = /^\s?(?<opening>\[)?(?<segment>,|(.+?)),(?<lemma>,|(.+?)),(?<tags>[\w:.]+),(?<frequency>_|(.+?)),(?<qualifiers>_|(.+?))(?<ending>\])?$/;
|
||||
|
||||
const output = [];
|
||||
|
||||
let segment = undefined;
|
||||
let inSegment = false;
|
||||
|
||||
for (const line of stdout.toString().split(/\r?\n|\r/)) {
|
||||
if (line.trim().length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const match = line.match(linePattern);
|
||||
|
||||
if (!match) {
|
||||
throw new Error(`Following line does not match the pattern: ${line}`);
|
||||
}
|
||||
|
||||
const { opening, ending, ...data } = match.groups;
|
||||
|
||||
if (opening) {
|
||||
inSegment = true;
|
||||
segment = [];
|
||||
}
|
||||
|
||||
segment.push(data);
|
||||
|
||||
if (ending) {
|
||||
inSegment = false;
|
||||
output.push(segment);
|
||||
segment = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
return output.flatMap(a => a).filter(entry => tags.every(tag => entry.tags.includes(tag)));
|
||||
}
|
||||
|
||||
async function main([ interpreter, script, action, ...args ]) {
|
||||
const fn = {
|
||||
analyse,
|
||||
danalyse,
|
||||
generate
|
||||
}[action];
|
||||
|
||||
if (!fn) {
|
||||
throw new Error(`Unknown action: '${action}'`);
|
||||
}
|
||||
|
||||
const output = await fn(...args);
|
||||
|
||||
console.log(JSON.stringify(output, undefined, 2));
|
||||
}
|
||||
|
||||
|
||||
main(process.argv);
|
||||
Reference in New Issue
Block a user