-
Notifications
You must be signed in to change notification settings - Fork 5
/
languages.js
184 lines (167 loc) · 6.7 KB
/
languages.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
// Copyright 2022 DeepL SE (https://www.deepl.com)
// Use of this source code is governed by an MIT
// license that can be found in the LICENSE file.
const languages = new Map([
['AR', { name: 'Arabic', type: 'both', text: 'شعاع البروتون' }],
['BG', { name: 'Bulgarian', type: 'both', text: 'протонен лъч' }],
['CS', { name: 'Czech', type: 'both', text: 'protonový paprsek' }],
['DA', { name: 'Danish', type: 'both', text: 'protonstråle' }],
['DE', {
name: 'German', type: 'both', formality: true, text: 'Protonenstrahl',
}],
['EL', { name: 'Greek', type: 'both', text: 'δέσμη πρωτονίων' }],
['EN', { name: 'English', type: 'source', text: 'proton beam' }],
['EN-GB', { name: 'English (British)', type: 'target', text: 'proton beam' }],
['EN-US', { name: 'English (American)', type: 'target', text: 'proton beam' }],
['ES', {
name: 'Spanish', type: 'both', formality: true, text: 'haz de protones',
}],
['ET', { name: 'Estonian', type: 'both', text: 'prootonikiirgus' }],
['FI', { name: 'Finnish', type: 'both', text: 'protonisäde' }],
['FR', {
name: 'French', type: 'both', formality: true, text: 'faisceau de protons',
}],
['HU', { name: 'Hungarian', type: 'both', text: 'protonnyaláb' }],
['ID', { name: 'Indonesian', type: 'both', text: 'berkas proton' }],
['IT', {
name: 'Italian', type: 'both', formality: true, text: 'fascio di protoni',
}],
['JA', {
name: 'Japanese', type: 'both', formality: true, text: '陽子ビーム',
}],
['KO', { name: 'Korean', type: 'both', text: '양성자 빔' }],
['LT', { name: 'Lithuanian', type: 'both', text: 'protonų spindulys' }],
['LV', { name: 'Latvian', type: 'both', text: 'protonu staru kūlis' }],
['NB', { name: 'Norwegian (bokmål)', type: 'both', text: 'protonstråle' }],
['NL', {
name: 'Dutch', type: 'both', formality: true, text: 'protonenbundel',
}],
['PL', {
name: 'Polish', type: 'both', formality: true, text: 'wiązka protonów',
}],
['PT', { name: 'Portuguese', type: 'source', text: 'feixe de prótons' }],
['PT-BR', {
name: 'Portuguese (Brazilian)', type: 'target', formality: true, text: 'feixe de prótons',
}],
['PT-PT', {
name: 'Portuguese (European)', type: 'target', formality: true, text: 'feixe de prótons',
}],
['RO', { name: 'Romanian', type: 'both', text: 'fascicul de protoni' }],
['RU', {
name: 'Russian', type: 'both', formality: true, text: 'протонный луч',
}],
['SK', { name: 'Slovak', type: 'both', text: 'protónový lúč' }],
['SL', { name: 'Slovenian', type: 'both', text: 'protonski žarek' }],
['SV', { name: 'Swedish', type: 'both', text: 'protonstråle' }],
['TR', { name: 'Turkish', type: 'both', text: 'proton ışını' }],
['UK', { name: 'Ukrainian', type: 'both', text: 'протонний пучок' }],
['ZH', {
source_name: 'Chinese', target_name: 'Chinese (simplified)', type: 'both', text: '质子束',
}],
]);
const glossaryLanguages = ['da', 'de', 'en', 'es', 'fr', 'it', 'ja', 'nb', 'nl', 'pl', 'pt', 'ru', 'sv', 'zh'];
const glossaryLanguagePairs = glossaryLanguages.flatMap(
(source) => glossaryLanguages.map(
(target) => ((source === target) ? null : {
source_lang: source,
target_lang: target,
}),
),
).filter((p) => p);
function isSourceLanguage(langCode) {
// Unspecified source_lang parameter activates auto-detect
if (langCode === undefined) return true;
const langCodeUpper = langCode.toUpperCase();
return languages.has(langCodeUpper) && ['source', 'both'].includes(languages.get(langCodeUpper).type);
}
function isTargetLanguage(langCode) {
if (langCode === undefined) return false;
const langCodeUpper = langCode.toUpperCase();
return languages.has(langCodeUpper) && ['target', 'both'].includes(languages.get(langCodeUpper).type);
}
function isGlossaryLanguage(langCode) {
if (langCode === undefined) return false;
const langCodeLower = langCode.toLowerCase();
return glossaryLanguages.includes(langCodeLower);
}
function isGlossarySupportedLanguagePair(sourceLang, targetLang) {
return glossaryLanguages.includes(sourceLang.toLowerCase())
&& glossaryLanguages.includes(targetLang.toLowerCase());
}
function supportsFormality(langCode, formality) {
if (langCode === undefined) return false;
if (formality === 'default') return true;
if (formality !== undefined && formality.startsWith('prefer_')) return true;
const langCodeUpper = langCode.toUpperCase();
return languages.has(langCodeUpper) && languages.get(langCodeUpper).formality !== undefined;
}
function getSourceLanguages() {
const sourceLanguages = [];
languages.forEach((lang, code) => {
if (['source', 'both'].includes(lang.type)) {
sourceLanguages.push({
language: code,
name: lang.source_name ?? lang.name,
});
}
});
return sourceLanguages;
}
function getTargetLanguages() {
const targetLanguages = [];
languages.forEach((lang, code) => {
if (['target', 'both'].includes(lang.type)) {
targetLanguages.push({
language: code,
name: lang.target_name ?? lang.name,
supports_formality: Boolean(lang.formality),
});
}
});
return targetLanguages;
}
function getGlossaryLanguagePairs() {
return glossaryLanguagePairs;
}
function translateLine(input, targetLang, glossary) {
if (input === '') return '';
if (glossary) {
const glossaryResult = glossary.translate(input);
if (glossaryResult) return glossaryResult;
}
// Mock server simplification: each input text is translated to a fixed text for the target
// language
return languages.get(targetLang).text;
}
function translate(input, targetLang, sourceLangIn, glossary) {
let sourceLang = sourceLangIn;
if (!sourceLang && glossary === undefined) {
// Mock server simplification: if sourceLang undefined and no test-string match, assume
// source text is English
sourceLang = 'EN';
// eslint-disable-next-line no-restricted-syntax
for (const [code, lang] of languages.entries()) {
if (input.startsWith(lang.text)) {
sourceLang = code;
break;
}
}
}
// Split into lines and translate individually
const text = input.split('\n').map((line) => (translateLine(line, targetLang, glossary))).join('\n');
const textShort = text.length < 50 ? text : `${text.slice(0, 47)}...`;
const inputShort = input.length < 50 ? input : `${input.slice(0, 47)}...`;
console.log(`Translated "${inputShort}" to "${textShort}"`);
return { detected_source_language: sourceLang, text };
}
module.exports = {
isGlossaryLanguage,
isSourceLanguage,
getSourceLanguages,
isTargetLanguage,
supportsFormality,
getTargetLanguages,
getGlossaryLanguagePairs,
isGlossarySupportedLanguagePair,
translate,
};