-
Notifications
You must be signed in to change notification settings - Fork 5
/
glossaries.js
189 lines (170 loc) · 6 KB
/
glossaries.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
// Copyright 2022 DeepL SE (https://www.deepl.com)
// Use of this source code is governed by an MIT
// license that can be found in the LICENSE file.
const uuid = require('uuid');
const csvParser = require('csv-parser');
const { Readable } = require('stream');
const util = require('./util');
const languages = require('./languages');
const glossaries = new Map();
util.scheduleCleanup(glossaries, (glossary, glossaryId) => {
console.log(`Removing glossary "${glossary.name}" (${glossaryId})`);
});
function findEntry(entryList, sourceEntry) {
for (let i = 0; i < entryList.length; i += 1) {
if (entryList[i].source === sourceEntry) {
return entryList[i].target;
}
}
return undefined;
}
function convertListToGlossaryTsv(entriesList) {
return entriesList.map((value) => `${value.source}\t${value.target}`).join('\n');
}
function convertGlossaryTsvToList(entriesTsv) {
const entryList = [];
const entries = entriesTsv.split('\n');
if (entries.length === 0) {
throw new util.HttpError('Bad request', 400, 'Missing or invalid argument: entries');
}
for (let entryIndex = 0; entryIndex < entries.length; entryIndex += 1) {
const entryPosition = 0; // TODO Implement calculation of entry positions
const entry = entries[entryIndex].trim();
if (entry !== '') {
const tabPosition = entry.indexOf('\t');
if (tabPosition === -1) {
throw new util.HttpError('Invalid glossary entries provided', 400,
`Key with the index ${entryIndex} (starting at position ${entryPosition}) misses tab separator`);
}
const source = entry.substr(0, tabPosition);
const target = entry.substr(tabPosition + 1);
if (findEntry(entryList, source) !== undefined) {
throw new util.HttpError('Invalid glossary entries provided', 400,
`Key with the index ${entryIndex} (starting at position ${entryPosition}) duplicates key with the index {} (starting at position {})`);
}
entryList.push({ source, target });
}
}
return entryList;
}
function convertGlossaryCsvToList(entriesCsv, glossarySourceLang, glossaryTargetLang) {
if (entriesCsv.length === 0) {
throw new util.HttpError('Bad request', 400, 'Missing or invalid argument: entries');
}
return new Promise(((resolve, reject) => {
const readable = Readable.from([entriesCsv]);
const results = [];
readable.pipe(csvParser({ headers: false }))
.on('data', (data) => {
const sourceEntry = data[0];
const targetEntry = data[1];
const sourceLang = data[2];
const targetLang = data[3];
// Ignore empty lines
if (sourceEntry === undefined || targetEntry === undefined) return;
// Ignore lines where the source lang or target lang do not match glossary lang
if (sourceLang !== undefined && targetLang !== undefined
&& sourceLang.toUpperCase() !== glossarySourceLang
&& targetLang.toUpperCase() !== glossaryTargetLang) return;
results.push({ source: sourceEntry, target: targetEntry });
})
.on('end', () => {
if (results.length === 0) {
return reject(new util.HttpError('Invalid glossary entries provided', 400));
}
return resolve(results);
})
.on('error', (err) => reject(err));
}));
}
function extractGlossaryInfo(glossary) {
return {
glossary_id: glossary.glossaryId,
name: glossary.name,
ready: glossary.ready,
target_lang: glossary.targetLang.toLowerCase(),
source_lang: glossary.sourceLang.toLowerCase(),
creation_time: glossary.created.toISOString(),
entry_count: glossary.entryList.length,
};
}
function isValidGlossaryId(glossaryId) {
return uuid.validate(glossaryId);
}
function translateWithGlossary(entryList, input) {
for (let entryIndex = 0; entryIndex < entryList.length; entryIndex += 1) {
const { source, target } = entryList[entryIndex];
if (source === input) {
return target;
}
}
return null;
}
async function createGlossary(name, authKey, targetLang, sourceLang, entriesFormat, entries) {
if (!languages.isGlossarySupportedLanguagePair(sourceLang, targetLang)) {
throw new util.HttpError('Unsupported glossary source and target language pair', 400);
}
let entryList;
if (entriesFormat === 'tsv') {
entryList = convertGlossaryTsvToList(entries);
} else {
entryList = await convertGlossaryCsvToList(entries, sourceLang, targetLang);
}
const glossaryId = uuid.v1();
// Add glossary to list
const glossary = {
glossaryId,
name,
created: new Date(),
used: new Date(),
ready: true,
authKey,
sourceLang,
targetLang,
entryList,
translate: (input) => translateWithGlossary(glossary.entryList, input),
};
glossaries.set(glossaryId, glossary);
console.log(`Created glossary "${glossary.name}" (${glossaryId})`);
return extractGlossaryInfo(glossary);
}
function getGlossary(glossaryId, authKey) {
const glossary = glossaries.get(glossaryId);
if (glossary?.authKey === authKey) {
glossary.used = new Date();
return glossary;
}
throw new util.HttpError('not found', 404);
}
function getGlossaryInfo(glossaryId, authKey) {
return extractGlossaryInfo(getGlossary(glossaryId, authKey));
}
function getGlossaryInfoList(authKey) {
const result = [];
// eslint-disable-next-line no-restricted-syntax
for (const [, glossary] of glossaries.entries()) {
if (glossary.authKey === authKey) {
result.push(extractGlossaryInfo(glossary));
}
}
return result;
}
function getGlossaryEntries(glossaryId, authKey) {
const glossary = getGlossary(glossaryId, authKey);
return convertListToGlossaryTsv(glossary.entryList);
}
function removeGlossary(glossaryId, authKey) {
const glossary = getGlossary(glossaryId, authKey);
console.log(`Removing glossary "${glossary.name}" (${glossaryId})`);
glossaries.delete(glossaryId);
console.log('Done');
}
module.exports = {
createGlossary,
isValidGlossaryId,
getGlossary,
getGlossaryInfo,
getGlossaryInfoList,
getGlossaryEntries,
removeGlossary,
};