Skip to content

Commit

Permalink
Finish parseCantoneseReadings function and add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
MarvNC committed Jan 20, 2024
1 parent 714ac73 commit 8d5d702
Show file tree
Hide file tree
Showing 2 changed files with 139 additions and 4 deletions.
56 changes: 56 additions & 0 deletions src/test/parseCantoneseReadings.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import test from 'ava';

import { parseCantoneseReadings } from '../util/parseCantoneseReadings.js';

/**
* @typedef {Object} TestCase
* @property {string} text
* @property {string} reading
* @property {ReturnType<typeof parseCantoneseReadings>} expected
*/

/**
* @type {TestCase[]}
*/
const testCases = [
{
text: '福州',
reading: 'fuk1 zau1',
expected: [
{ text: '福', reading: 'fuk1' },
{ text: '州', reading: 'zau1' },
],
},
{
text: 'bu你阿麼',
reading: 'bu4 ni5 aa3 mo1',
expected: [
{ text: 'bu', reading: 'bu4' },
{ text: '你', reading: 'ni5' },
{ text: '阿', reading: 'aa3' },
{ text: '麼', reading: 'mo1' },
],
},
{
text: '你get唔get到我講咩?',
reading: 'nei5 get1 m4 get1 dou2 ngo5 gong2 me1?',
expected: [
{ text: '你', reading: 'nei5' },
{ text: 'get', reading: 'get1' },
{ text: '唔', reading: 'm4' },
{ text: 'get', reading: 'get1' },
{ text: '到', reading: 'dou2' },
{ text: '我', reading: 'ngo5' },
{ text: '講', reading: 'gong2' },
{ text: '咩', reading: 'me1' },
{ text: '?', reading: '?' },
],
},
];

for (const { text, reading, expected } of testCases) {
test(`parseCantoneseReadings: ${text} ${reading}`, (t) => {
const result = parseCantoneseReadings(text, reading);
t.deepEqual(result, expected);
});
}
87 changes: 83 additions & 4 deletions src/util/parseCantoneseReadings.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,91 @@
/**
* Parses a text string into an array matching each character to the readings
* @example "你get唔get到我講咩? (nei5 get1 m4 get1 dou2 ngo5 gong2 me1?)" => [{text: "你", reading: "nei5"}, {text: "get", reading: "get1"}, ...]
* @example text: "你get唔get到我講咩?"
* reading: "nei5 get1 m4 get1 dou2 ngo5 gong2 me1?"
* =>
* [{text: "你", reading: "nei5"}, {text: "get", reading: "get1"}, ...]
* @param {string} text
* @param {string} readings
* @returns {{text: string, reading: string}[]}
*/
function parseCantoneseReadings(text) {
// TODO
return [];
function parseCantoneseReadings(text, readings) {
/**
* @type {{text: string, reading: string}[]}
*/
const resultArray = [];
let textIndex = 0;
let readingIndex = 0;
const punctuations = [
',',
',',
'。',
'.',
'?',
'?',
'!',
'!',
';',
';',
':',
':',
'、',
',',
];

const textArray = splitString(text, punctuations);
const readingsArray = splitString(readings, punctuations);
if (textArray.length !== readingsArray.length) {
throw new Error('Text and readings do not match');
}
return textArray.map((text, index) => {
const reading = readingsArray[index];
return {
text,
reading,
};
});
}

/**
*
* @param {string} input
* @param {string[]} punctuations
* @returns {string[]}
*/
function splitString(input, punctuations) {
const resultArray = [];
let current = '';
for (const char of input) {
if (/[a-zA-Z0-9]/.test(char)) {
// if (current) {
// resultArray.push(current);
// current = '';
// }
current += char;
} else if (punctuations[char]) {
if (current) {
resultArray.push(current);
current = '';
}
resultArray.push(char);
} else {
if (current) {
resultArray.push(current);
current = '';
}
resultArray.push(char);
}
}
// Push the last current
if (current) {
resultArray.push(current);
}

// Remove empty strings
const resultArrayFiltered = resultArray
.map((item) => item.trim())
.filter((item) => item);
return resultArrayFiltered;
}

export { parseCantoneseReadings };

0 comments on commit 8d5d702

Please sign in to comment.