Add tags parsing
This commit is contained in:
55
index.js
55
index.js
@@ -6,8 +6,7 @@ async function analyse(input) {
|
|||||||
const linePattern = /^\s?(?<opening>\[)?(?<start>\d+),(?<end>\d+),(?<segment>,|(.+?)),(?<lemma>,|(.+?)),(?<tags>[\w:.]+),(?<frequency>_|(.+?)),(?<qualifiers>_|(.+?))(?<ending>\])?$/;
|
const linePattern = /^\s?(?<opening>\[)?(?<start>\d+),(?<end>\d+),(?<segment>,|(.+?)),(?<lemma>,|(.+?)),(?<tags>[\w:.]+),(?<frequency>_|(.+?)),(?<qualifiers>_|(.+?))(?<ending>\])?$/;
|
||||||
|
|
||||||
const output = [];
|
const output = [];
|
||||||
let segment = undefined;
|
let segment = undefined;
|
||||||
let inSegment = false;
|
|
||||||
|
|
||||||
for (const line of stdout.toString().split(/\r?\n|\r/)) {
|
for (const line of stdout.toString().split(/\r?\n|\r/)) {
|
||||||
if (line.trim().length === 0) {
|
if (line.trim().length === 0) {
|
||||||
@@ -27,7 +26,7 @@ async function analyse(input) {
|
|||||||
segment = [];
|
segment = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
segment.push(data);
|
segment.push({ ...data, tags: parseTags(data.tags), rtags: data.tags });
|
||||||
|
|
||||||
if (ending) {
|
if (ending) {
|
||||||
inSegment = false;
|
inSegment = false;
|
||||||
@@ -42,6 +41,12 @@ async function analyse(input) {
|
|||||||
async function runConcraftLocally(input) {
|
async function runConcraftLocally(input) {
|
||||||
const { stdout, stderr } = spawnSync(CONCRAFT_BIN, ["tag", CONCRAFT_MODEL], { input });
|
const { stdout, stderr } = spawnSync(CONCRAFT_BIN, ["tag", CONCRAFT_MODEL], { input });
|
||||||
|
|
||||||
|
const error = stderr.toString();
|
||||||
|
|
||||||
|
if (error?.trim()?.length > 0) {
|
||||||
|
throw new Error(error);
|
||||||
|
}
|
||||||
|
|
||||||
return stdout.toString()
|
return stdout.toString()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -63,8 +68,8 @@ async function invokeConcraftRemotely(dag) {
|
|||||||
async function desambiguate(analysis) {
|
async function desambiguate(analysis) {
|
||||||
const input = analysis
|
const input = analysis
|
||||||
.flatMap(a => a)
|
.flatMap(a => a)
|
||||||
.map(a => ({ ...a, tags: a.tags.split(":").map(t => t.split(".")[0]).join(":") }))
|
.map(a => ({ ...a, tags: Object.values(a.tags).filter(t => t).map(tag => tag.split(".")[0]).join(":") }))
|
||||||
.map(a => `${[...Object.values(a), "0.0", "_", "_", "_"].join("\t")}`)
|
.map(a => `${[...Object.keys(a).filter(k => !['rtags'].includes(k)).map(k => a[k]), "0.0", "_", "_", "_"].join("\t")}`)
|
||||||
.join("\n");
|
.join("\n");
|
||||||
|
|
||||||
|
|
||||||
@@ -84,7 +89,9 @@ async function desambiguate(analysis) {
|
|||||||
element.end = Number.parseInt(element.end);
|
element.end = Number.parseInt(element.end);
|
||||||
element.disamb = element.disamb === 'disamb';
|
element.disamb = element.disamb === 'disamb';
|
||||||
element.eos = element.eos === 'eos';
|
element.eos = element.eos === 'eos';
|
||||||
element.prob = Number.parseFloat(element.prob);
|
element.prob = Number.parseFloat(element.prob);
|
||||||
|
element.rtags = element.tags;
|
||||||
|
element.tags = parseTags(element.rtags);
|
||||||
|
|
||||||
output.push(element);
|
output.push(element);
|
||||||
}
|
}
|
||||||
@@ -137,6 +144,42 @@ function generate(input, ...tags) {
|
|||||||
return output.flatMap(a => a).filter(entry => tags.every(tag => entry.tags.includes(tag)));
|
return output.flatMap(a => a).filter(entry => tags.every(tag => entry.tags.includes(tag)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function parseTags(tags) {
|
||||||
|
const [type, ...rest] = tags.split(":");
|
||||||
|
|
||||||
|
const parsers = {
|
||||||
|
adv: (degree) => ({ degree }),
|
||||||
|
imps: (aspect) => ({ aspect }),
|
||||||
|
inf: (aspect) => ({ aspect }),
|
||||||
|
pant: (aspect) => ({ aspect }),
|
||||||
|
pcon: (aspect) => ({ aspect }),
|
||||||
|
qub: (vocalicity) => ({ vocalicity }),
|
||||||
|
prep: (c, vocalicity) => ({ case: c, vocalicity }),
|
||||||
|
siebie: (c) => ({ case: c }),
|
||||||
|
subst: (number, c, gender,) => ({ number, case: c, gender }),
|
||||||
|
depr: (number, c, gender,) => ({ number, case: c, gender }),
|
||||||
|
ger: (number, c, gender, aspect, negation) => ({ number, case: c, gender, aspect, negation }),
|
||||||
|
ppron12: (number, c, gender, person, accentability) => ({ number, case: c, gender, person, accentability }),
|
||||||
|
ppron3: (number, c, gender, person, accentability, postprepositionality) => ({ number, case: c, gender, person, accentability, postprepositionality }),
|
||||||
|
num: (number, c, gender, accommodability) => ({ number, case: c, gender, accommodability }),
|
||||||
|
numcol: (number, c, gender, accommodability) => ({ number, case: c, gender, accommodability }),
|
||||||
|
adj: (number, c, gender, degree) => ({ number, case: c, gender, degree }),
|
||||||
|
pact: (number, c, gender, aspect, negation) => ({ number, case: c, gender, aspect, negation }),
|
||||||
|
ppas: (number, c, gender, aspect, negation) => ({ number, case: c, gender, aspect, negation }),
|
||||||
|
winien: (number, gender, aspect,) => ({ number, gender, aspect }),
|
||||||
|
praet: (number, gender, aspect, agglutination) => ({ number, gender, aspect, agglutination }),
|
||||||
|
bedzie: (number, person, aspect,) => ({ number, person, aspect }),
|
||||||
|
fin: (number, person, aspect,) => ({ number, person, aspect }),
|
||||||
|
impt: (number, person, aspect,) => ({ number, person, aspect }),
|
||||||
|
aglt: (number, person, aspect, vocalicity) => ({ number, person, aspect, vocalicity }),
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
type,
|
||||||
|
... (parsers[type]?.(...rest) ?? {})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function main([ interpreter, script, action, ...args ]) {
|
async function main([ interpreter, script, action, ...args ]) {
|
||||||
const fn = {
|
const fn = {
|
||||||
analyse,
|
analyse,
|
||||||
|
|||||||
Reference in New Issue
Block a user