const { spawnSync } = require("child_process"); async function analyse(input) { const { stdout } = spawnSync(MORFEUSZ_ANALYSER_BIN, [], { input }); const linePattern = /^\s?(?\[)?(?\d+),(?\d+),(?,|(.+?)),(?,|(.+?)),(?[\w:.]+),(?_|(.+?)),(?_|(.+?))(?\])?$/; const output = []; let segment = undefined; for (const line of stdout.toString().split(/\r?\n|\r/)) { if (line.trim().length === 0) { continue; } const match = line.match(linePattern); if (!match) { throw new Error(`Following line does not match the pattern: ${line}`); } const { opening, ending, ...data } = match.groups; if (opening) { inSegment = true; segment = []; } segment.push({ ...data, tags: parseTags(data.tags), rtags: data.tags }); if (ending) { inSegment = false; output.push(segment); segment = undefined; } } return output; } async function runConcraftLocally(input) { const { stdout, stderr } = spawnSync(CONCRAFT_BIN, ["tag", CONCRAFT_MODEL], { input }); const error = stderr.toString(); if (error?.trim()?.length > 0) { throw new Error(error); } return stdout.toString() } async function invokeConcraftRemotely(dag) { const response = await fetch(`${CONCRAFT_SERVER_URL}/parse`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ dag }) }); const data = await response.json(); return data.dag; } async function desambiguate(analysis) { const input = analysis .flatMap(a => a) .map(a => ({ ...a, tags: Object.values(a.tags).map(tag => tag.split(".")[0]).join(":") })) .map(a => `${[...Object.keys(a).filter(k => !['rtags'].includes(k)).map(k => a[k]), "0.0", "_", "_", "_"].join("\t")}`) .join("\n"); const response = await (CONCRAFT_MODE === 'local' ? runConcraftLocally : invokeConcraftRemotely)(input); const keys = ["start", "end", "segment", "lemma", "tags", "frequency", "qualifiers", "prob", "interp_meta", "eos", "seg_meta", "disamb"]; const output = []; for (const line of response.split(/\r?\n|\r/)) { if (line.trim().length === 0) { continue; } const properties = line.split(/\t/).map((prop, index) => [keys[index], prop === '_' ? '' : prop]); const element = Object.fromEntries(properties); element.start = Number.parseInt(element.start); element.end = Number.parseInt(element.end); element.disamb = element.disamb === 'disamb'; element.eos = element.eos === 'eos'; element.prob = Number.parseFloat(element.prob); element.rtags = element.tags; element.tags = parseTags(element.rtags); output.push(element); } return output; } async function danalyse(input) { const analysis = await analyse(input); return await desambiguate(analysis); } function generate(input, ...tags) { const { stdout } = spawnSync(MORFEUSZ_GENERATOR_BIN, [], { input }); const linePattern = /^\s?(?\[)?(?,|(.+?)),(?,|(.+?)),(?[\w:.]+),(?_|(.+?)),(?_|(.+?))(?\])?$/; const output = []; let segment = undefined; let inSegment = false; for (const line of stdout.toString().split(/\r?\n|\r/)) { if (line.trim().length === 0) { continue; } const match = line.match(linePattern); if (!match) { throw new Error(`Following line does not match the pattern: ${line}`); } const { opening, ending, ...data } = match.groups; if (opening) { inSegment = true; segment = []; } segment.push(data); if (ending) { inSegment = false; output.push(segment); segment = undefined; } } return output.flatMap(a => a).filter(entry => tags.every(tag => entry.tags.includes(tag))); } function parseTags(tags) { const [type, ...rest] = tags.split(":"); const parsers = { adv: (degree) => ({ degree }), imps: (aspect) => ({ aspect }), inf: (aspect) => ({ aspect }), pant: (aspect) => ({ aspect }), pcon: (aspect) => ({ aspect }), qub: (vocalicity) => ({ vocalicity }), prep: (c, vocalicity) => ({ case: c, vocalicity }), siebie: (c) => ({ case: c }), subst: (number, c, gender,) => ({ number, case: c, gender }), depr: (number, c, gender,) => ({ number, case: c, gender }), ger: (number, c, gender, aspect, negation) => ({ number, case: c, gender, aspect, negation }), ppron12: (number, c, gender, person, accentability) => ({ number, case: c, gender, person, accentability }), ppron3: (number, c, gender, person, accentability, postprepositionality) => ({ number, case: c, gender, person, accentability, postprepositionality }), num: (number, c, gender, accommodability) => ({ number, case: c, gender, accommodability }), numcol: (number, c, gender, accommodability) => ({ number, case: c, gender, accommodability }), adj: (number, c, gender, degree) => ({ number, case: c, gender, degree }), pact: (number, c, gender, aspect, negation) => ({ number, case: c, gender, aspect, negation }), ppas: (number, c, gender, aspect, negation) => ({ number, case: c, gender, aspect, negation }), winien: (number, gender, aspect,) => ({ number, gender, aspect }), praet: (number, gender, aspect, agglutination) => ({ number, gender, aspect, agglutination }), bedzie: (number, person, aspect,) => ({ number, person, aspect }), fin: (number, person, aspect,) => ({ number, person, aspect }), impt: (number, person, aspect,) => ({ number, person, aspect }), aglt: (number, person, aspect, vocalicity) => ({ number, person, aspect, vocalicity }), }; return { type, ... (parsers[type]?.(...rest) ?? {}) } } async function main([ interpreter, script, action, ...args ]) { const fn = { analyse, danalyse, generate }[action]; if (!fn) { throw new Error(`Unknown action: '${action}'`); } const output = await fn(...args); console.log(JSON.stringify(output, undefined, 2)); } main(process.argv);