diff --git a/index.js b/index.js index f7107d7..d22925f 100755 --- a/index.js +++ b/index.js @@ -6,8 +6,7 @@ async function analyse(input) { const linePattern = /^\s?(?\[)?(?\d+),(?\d+),(?,|(.+?)),(?,|(.+?)),(?[\w:.]+),(?_|(.+?)),(?_|(.+?))(?\])?$/; const output = []; - let segment = undefined; - let inSegment = false; + let segment = undefined; for (const line of stdout.toString().split(/\r?\n|\r/)) { if (line.trim().length === 0) { @@ -27,7 +26,7 @@ async function analyse(input) { segment = []; } - segment.push(data); + segment.push({ ...data, tags: parseTags(data.tags), rtags: data.tags }); if (ending) { inSegment = false; @@ -42,6 +41,12 @@ async function analyse(input) { async function runConcraftLocally(input) { const { stdout, stderr } = spawnSync(CONCRAFT_BIN, ["tag", CONCRAFT_MODEL], { input }); + const error = stderr.toString(); + + if (error?.trim()?.length > 0) { + throw new Error(error); + } + return stdout.toString() } @@ -63,8 +68,8 @@ async function invokeConcraftRemotely(dag) { async function desambiguate(analysis) { const input = analysis .flatMap(a => a) - .map(a => ({ ...a, tags: a.tags.split(":").map(t => t.split(".")[0]).join(":") })) - .map(a => `${[...Object.values(a), "0.0", "_", "_", "_"].join("\t")}`) + .map(a => ({ ...a, tags: Object.values(a.tags).filter(t => t).map(tag => tag.split(".")[0]).join(":") })) + .map(a => `${[...Object.keys(a).filter(k => !['rtags'].includes(k)).map(k => a[k]), "0.0", "_", "_", "_"].join("\t")}`) .join("\n"); @@ -84,7 +89,9 @@ async function desambiguate(analysis) { element.end = Number.parseInt(element.end); element.disamb = element.disamb === 'disamb'; element.eos = element.eos === 'eos'; - element.prob = Number.parseFloat(element.prob); + element.prob = Number.parseFloat(element.prob); + element.rtags = element.tags; + element.tags = parseTags(element.rtags); output.push(element); } @@ -137,6 +144,42 @@ function generate(input, ...tags) { return output.flatMap(a => a).filter(entry => tags.every(tag => entry.tags.includes(tag))); } +function parseTags(tags) { + const [type, ...rest] = tags.split(":"); + + const parsers = { + adv: (degree) => ({ degree }), + imps: (aspect) => ({ aspect }), + inf: (aspect) => ({ aspect }), + pant: (aspect) => ({ aspect }), + pcon: (aspect) => ({ aspect }), + qub: (vocalicity) => ({ vocalicity }), + prep: (c, vocalicity) => ({ case: c, vocalicity }), + siebie: (c) => ({ case: c }), + subst: (number, c, gender,) => ({ number, case: c, gender }), + depr: (number, c, gender,) => ({ number, case: c, gender }), + ger: (number, c, gender, aspect, negation) => ({ number, case: c, gender, aspect, negation }), + ppron12: (number, c, gender, person, accentability) => ({ number, case: c, gender, person, accentability }), + ppron3: (number, c, gender, person, accentability, postprepositionality) => ({ number, case: c, gender, person, accentability, postprepositionality }), + num: (number, c, gender, accommodability) => ({ number, case: c, gender, accommodability }), + numcol: (number, c, gender, accommodability) => ({ number, case: c, gender, accommodability }), + adj: (number, c, gender, degree) => ({ number, case: c, gender, degree }), + pact: (number, c, gender, aspect, negation) => ({ number, case: c, gender, aspect, negation }), + ppas: (number, c, gender, aspect, negation) => ({ number, case: c, gender, aspect, negation }), + winien: (number, gender, aspect,) => ({ number, gender, aspect }), + praet: (number, gender, aspect, agglutination) => ({ number, gender, aspect, agglutination }), + bedzie: (number, person, aspect,) => ({ number, person, aspect }), + fin: (number, person, aspect,) => ({ number, person, aspect }), + impt: (number, person, aspect,) => ({ number, person, aspect }), + aglt: (number, person, aspect, vocalicity) => ({ number, person, aspect, vocalicity }), + }; + + return { + type, + ... (parsers[type]?.(...rest) ?? {}) + } +} + async function main([ interpreter, script, action, ...args ]) { const fn = { analyse,