-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.js
492 lines (470 loc) · 13.9 KB
/
main.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
document.getElementById("apply").addEventListener("click", applyClicked);
// Enables debug mode
window.debug = false;
// Logs all sound changes
window.logChanges = false;
// Last rules (raw and parsed), used to skip rule parsing if rules weren't changed
window.lastRules = undefined;
window.lastPRules = undefined;
/*
* This is called whenever the "Apply" button is pressed. It gathers the contents of each input element, parses the
* rules, and then applies the rules to the input words to generate the output.
*/
function applyClicked(event) {
// Get data from each input element
window.debug = document.getElementById("enable-debug").checked;
window.logChanges = document.getElementById("log-changes").checked;
let doNormalize = document.getElementById("normalize").checked;
let intermediates = document.getElementById("intermediates").checked;
if(intermediates) document.getElementById("output-style").checked = true;
let outputStyle = document.getElementById("output-style").checked;
let rules = document.getElementById("rules").value;
let input = document.getElementById("input").value;
let outputEl = document.getElementById("output");
let errorEl = document.getElementById("errors");
let initTime = new Date();
log("Starting");
// If normalize is checked, normalize rules and input
if(doNormalize) {
rules = rules.normalize()
input = input.normalize()
}
// Rules: split on newlines and remove spaces
rules = rules.split("\n").map(a => a.replace(/\s/g,"")).filter(a => a.length > 0);
// Input: split on all spaces
input = input.split(/\s/g).filter(a => a.length > 0);
log(rules);
log(input);
let pRules = window.lastPRules;
if(
lastRules == undefined // first run
|| rules.length != lastRules.length // raw rule lists don't match in length
|| !rules.every((val, index) => val === lastRules[index]) // raw rule lists aren't equal
) {
// Rules need to be parsed again
window.lastRules = rules;
pRules = parseRules(rules);
log(pRules);
log("New rules parsed");
// Show errors if any occured
if(pRules.errors.length > 0) {
errorEl.innerHTML = "<div>Errors found during parsing rules:</div>"
+ pRules.errors.map(a => escapeHTML(a)).join("<br />");
log("Errors found while parsing rules");
} else {
errorEl.innerHTML = "";
}
}
// Apply the rules to the input
let outputResults = applyRules(pRules, input);
let outputText;
if(intermediates) {
outputText = outputResults.map((el, idx) => input[idx] + " → " + el.join(" → ")).join("\n");
} else if(outputStyle) {
outputText = outputResults.map((el, idx) => input[idx] + " → " + el[el.length-1]).join("\n");
} else {
outputText = outputResults.map(x => x[x.length-1]).join("\n");
}
if(doNormalize) {
outputText = outputText.normalize();
}
outputEl.value = outputText;
window.lastPRules = pRules;
let diffTime = new Date() - initTime;
console.log("Rules applied. Total time: " + diffTime + " ms.");
}
/*
* Given a set of parsed rules and some input, apply the rules to the input. Loops through each word, then applies
* each rule to that word.
*/
function applyRules(rules, input) {
let output = [];
for(word of input) {
output.push([]);
for(change of rules.changes) {
if(change.intermediate) {
output[output.length-1].push(word);
continue;
}
let result = "";
let idx = 0;
let cursorpos = change.context.indexOf("_");
let ctx_before = change.context.slice(0,cursorpos).reverse();
let ctx_after = change.context.slice(cursorpos+1);
let ex_cursorpos = change.ex_context.indexOf("_");
let ex_ctx_before = change.ex_context.slice(0,ex_cursorpos).reverse();
let ex_ctx_after = change.ex_context.slice(ex_cursorpos+1);
// Keep going until the end of the word is reached
while(idx < word.length) {
let match = checkTarget(word, change.find, idx);
if(match == null) {
// rule did not match
result += word[idx];
idx += 1;
} else {
// "find" found, check context
let startIdx = idx-1;
let endIdx = match[0];
if( (ctx_before.length == 0 || checkContext(word, startIdx, -1, ctx_before))
&& (ctx_after.length == 0 || checkContext(word, endIdx, 1, ctx_after)) ){
// context good, check exception context
let good = false;
if(ex_ctx_before.length == 0 && ex_ctx_after.length == 0) {
// no exception context, always good
good = true;
} else {
let ex_ctx_before_match = checkContext(word, startIdx, -1, ex_ctx_before);
let ex_ctx_after_match = checkContext(word, endIdx, 1, ex_ctx_after);
// this logic was derived via brute-force guessing and checking
// do not ask me why it works
good = (ex_ctx_before.length == 0 && !ex_ctx_after_match)
|| (ex_ctx_after.length == 0 && !ex_ctx_before_match)
|| (!ex_ctx_before_match || !ex_ctx_after_match);
}
if(good) {
// exception context matches, perform replacement
result += mkReplacement(change.replace, match[1]);
idx = endIdx;
} else {
// exception context does not match
result += word[idx];
idx += 1;
}
} else {
// context does not match
result += word[idx];
idx += 1;
}
}
}
if(word != result) {
logChange(word, result);
word = result;
}
}
output[output.length-1].push(word);
}
return output;
}
/*
* Check if the target matches the word at the specified index.
*/
function checkTarget(word, find, startIdx) {
let idx = startIdx;
let groupHits = {};
for(segment of find) {
if(segment.type == "char") {
if(segment.name == word[idx]) {
idx += 1;
} else if(!segment.optional) {
return null;
}
} else if(segment.type == "group") {
let groupidx = segment.index;
let options = segment.elements;
let found = false;
if(groupHits[groupidx]) {
let ch = options[groupHits[groupidx]]
if(word.slice(idx).startsWith(ch)) {
idx += options[i].length;
found = true;
}
} else {
for(i in options) {
if(word.slice(idx).startsWith(options[i])) {
idx += options[i].length;
groupHits[groupidx] = i;
found = true;
break;
}
}
}
if(!found && !segment.optional) {
return null;
}
} else if(segment.type == "wildcard") {
idx += 1;
}
}
return [idx, groupHits];
}
/*
* Create the replacement string, depends on the groups that were matched in the target.
*/
function mkReplacement(replace, groupHits) {
let res = "";
for(segment of replace) {
if(segment.type == "char") {
res += segment.name;
} else if(segment.type == "group") {
let groupidx = segment.index;
let options = segment.elements;
if(groupHits[groupidx] < options.length) {
res += options[groupHits[groupidx]];
}
}
}
return res;
}
/*
* Check if one half of the context matches. This can be called twice per target match, once for the context before
* the _, once for the context after.
*/
function checkContext(word, idx, dir, ctx) {
for(segment of ctx) {
if(segment.type == "char") {
if(segment.name == word[idx]) {
idx += dir;
} else if(!segment.optional) {
return false;
}
} else if(segment.type == "group") {
options = segment.elements;
let found = false;
for(i in options) {
if(options[i] == "#") {
if(dir == 1 && idx >= word.length) {
return true;
} else if(dir == -1 && idx < 0) {
return true;
}
} else if(dir == 1) {
if(word.slice(idx).startsWith(options[i])) {
idx += options[i].length;
found = true;
break;
}
} else {
if(word.slice(0,idx+1).endsWith(options[i])) {
idx -= options[i].length;
found = true;
break;
}
}
}
if(!found && !segment.optional) {
return false;
}
} else if(segment.type == "boundary") {
if(dir == 1 && idx >= word.length) {
return true;
} else if(dir == -1 && idx < 0) {
return true;
} else if(!segment.optional) {
return false;
}
} else if(segment.type == "wildcard") {
if(word[idx] == undefined) {
return false
}
idx += dir;
}
}
return true;
}
// Regexes to detect the next groups
const nextSegment = /[^\/→={}\[\]_#,;?0123456789\(\)](?:[0-9]+)?/;
const nextNonce = /{[^\/→={}\[\]_;?0123456789\(\)]+}(?:[0-9]+)?/;
/*
* Parse the list of rules into a JS object.
*/
function parseRules(rules) {
let res = {
changes: [],
errors: []
};
let groups = {};
for(rule of rules) {
rule = rule.replace("→", "/");
if(rule.startsWith(";;")) {
// intermediate
res.changes.push({intermediate: true});
} else if(rule.startsWith(";")) {
//comment
} else if(rule.includes("=")) {
// group creation
let parts = rule.split("=");
if(parts.length != 2) {
res.errors.push("Invalid group declaration: " + rule);
} else if(parts[0].length != 1) {
res.errors.push("Invalid group name: " + parts[0]);
} else {
groups[parts[0]] = mkGroup(parts[1]);
}
} else if(rule.includes("/")) {
// sound change
let parts = rule.split("/");
if(parts.length < 2 || parts.length > 4) {
// rule must have two or three parts
res.errors.push("Invalid sound change declaration: " + rule);
} else {
if(parts.length <= 2) {
// no context given, rule applies everywhere
parts[2] = "_";
}
if(parts.length <= 3) {
// no context given, rule applies everywhere
parts[3] = "_";
}
if(!parts[2].includes("_")) {
res.errors.push("Context does not include an underscore: " + parts[2]);
parts[2] = "_";
}
if(!parts[3].includes("_")) {
res.errors.push("Exception context does not include an underscore: " + parts[3]);
parts[3] = "_";
}
let target = mkChangePart(parts[0], groups, false);
// target must not be empty
if(target[0].length > 0) {
let foundRequired = false;
// target must have at least one required segment
for(part of target[0]) {
if(!part.optional) {
foundRequired = true;
break;
}
}
if(foundRequired) {
let replace = mkChangePart(parts[1], groups, false);
let ctx = mkChangePart(parts[2], groups, true);
let ex_ctx = mkChangePart(parts[3], groups, true);
res.errors = res.errors.concat(target[1]).concat(replace[1]).concat(ctx[1]).concat(ex_ctx[1]);
res.changes.push({find: target[0], replace: replace[0], context: ctx[0], ex_context: ex_ctx[0]});
} else {
res.errors.push("Target must have at least one required segment: " + rule);
}
} else {
res.errors.push("Target cannot be left empty: " + rule);
}
}
} else {
res.errors.push("Invalid rule: " + rule);
}
}
return res;
}
/*
* Make one part of the sound change, either the target, replacement, or context.
* isCtx is only set true for the context.
*/
function mkChangePart(str, groups, isCtx) {
let remainder = str;
let nextidx = 0;
let part = [];
let errors = [];
while(remainder.length > 0) {
// Special elements only in the context
if(isCtx) {
if(remainder[0] == "_") {
part.push("_");
remainder = remainder.slice(1);
continue;
} else if(remainder[0] == "#") {
part.push({type:"boundary"});
remainder = remainder.slice(1);
continue;
}
}
// single-character operators
if(remainder[0] == "?") {
// ? operator: mark the last segment as optional
if(part.length == 0) {
errors.push("Question mark must follow another segent: " + str);
} else {
part[part.length-1].optional = true;
remainder = remainder.slice(1);
continue;
}
} else if(remainder[0] == "*") {
// * operator: match any character
part.push({type: "wildcard"});
remainder = remainder.slice(1);
continue;
}
// Try to match either a character or a group
let segmatch = remainder.match(nextSegment);
if(segmatch != null && remainder.indexOf(segmatch[0]) == 0) {
let seg = segmatch[0];
remainder = remainder.slice(seg.length);
if(seg.length > 1) {
// Segment has an index
let idx = seg.slice(1);
seg = seg[0];
if(!Object.keys(groups).includes(seg)) {
errors.push("Index given for character instead of group: " + str);
} else {
part.push({type: "group", elements: groups[seg], index: "_"+idx});
}
} else {
// Segment does not have an index
if(!Object.keys(groups).includes(seg)) {
part.push({type: "char", name: seg});
} else {
if(!isCtx) {
part.push({type: "group", elements: groups[seg], index: ""+nextidx});
nextidx += 1;
} else {
part.push({type: "group", elements: groups[seg]});
}
}
}
} else {
// Try matching a nonce group
let noncematch = remainder.match(nextNonce);
if(noncematch != null && remainder.indexOf(noncematch[0]) == 0) {
let nonce = noncematch[0];
remainder = remainder.slice(nonce.length);
nonce = nonce.slice(1);
let parts = nonce.split("}");
let group = mkGroup(parts[0]);
// Allow nesting named groups within nonce groups
let elements = [];
for(element of group) {
if(groups[element]) {
elements = elements.concat(groups[element]);
} else {
elements.push(element);
}
}
if(parts[1].length > 0) {
// Group has an index
part.push({type: "group", elements: elements, index: "_"+parts[1]});
} else {
// Group does not have an index
if(!isCtx) {
part.push({type: "group", elements: elements, index: ""+nextidx});
nextidx += 1;
} else {
part.push({type: "group", elements: elements});
}
}
} else {
errors.push("Invalid rule part: " + str);
return [part, errors];
}
}
}
return [part, errors];
}
function mkGroup(str) {
let res;
if(str.includes(",")) {
res = str.split(",");
} else {
res = str.split("");
}
return res.filter((el, i, arr) => el.length > 0 && arr.indexOf(el) === i)
}
function escapeHTML(str){
return new Option(str).innerHTML;
}
function escapeRegExp(str) {
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
function log(s) {
if(debug) console.log(s);
}
function logChange(before, after) {
if(logChanges) console.log(before + " --> " + after);
}