Skip to content

Commit

Permalink
Perf/Refactor: trie w/ hostname mode
Browse files Browse the repository at this point in the history
  • Loading branch information
SukkaW committed May 10, 2024
1 parent 487d4fe commit 59b86f7
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 35 deletions.
15 changes: 11 additions & 4 deletions Build/lib/trie.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ describe('Trie', () => {
trie.add('sesqueroman');
trie.add('greek');

console.log({ trie });

expect(trie.find('roman')).toEqual(['roman', 'esqueroman', 'sesqueroman']);
expect(trie.find('man')).toEqual(['roman', 'esqueroman', 'sesqueroman']);
expect(trie.find('esqueroman')).toEqual(['esqueroman', 'sesqueroman']);
Expand All @@ -97,16 +99,21 @@ describe('Trie', () => {
});
});

describe('surge domainset dedupe', () => {
describe.each([
['hostname mode off', false],
['hostname mode on', true]
])('surge domainset dedupe %s', (_, hostnameMode) => {
it('should not remove same entry', () => {
const trie = createTrie(['.skk.moe', 'noc.one']);
const trie = createTrie(['.skk.moe', 'noc.one'], hostnameMode);

console.log(trie);

expect(trie.find('.skk.moe')).toStrictEqual(['.skk.moe']);
expect(trie.find('noc.one')).toStrictEqual(['noc.one']);
});

it('should remove subdomain', () => {
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net']);
const trie = createTrie(['www.noc.one', 'www.sukkaw.com', 'blog.skk.moe', 'image.cdn.skk.moe', 'cdn.sukkaw.net'], hostnameMode);

console.log(trie);

Expand All @@ -115,7 +122,7 @@ describe('surge domainset dedupe', () => {
});

it('should not remove non-subdomain', () => {
const trie = createTrie(['skk.moe', 'sukkaskk.moe']);
const trie = createTrie(['skk.moe', 'sukkaskk.moe'], hostnameMode);
expect(trie.find('.skk.moe')).toStrictEqual([]);
});
});
126 changes: 95 additions & 31 deletions Build/lib/trie.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,45 @@ const createNode = (): TrieNode => {
return node;
};

export const createTrie = (from?: string[] | Set<string> | null) => {
export const createTrie = (from?: string[] | Set<string> | null, hostnameMode = false) => {
let size = 0;
const root: TrieNode = createNode();

const suffixToTokens = hostnameMode
? (suffix: string) => {
let buf = '';
const tokens: string[] = [];
for (let i = 0, l = suffix.length; i < l; i++) {
const c = suffix[i];
if (c === '.') {
if (buf) {
tokens.push(buf, /* . */ c);
buf = '';
} else {
tokens.push(/* . */ c);
}
} else {
buf += c;
}
}
if (buf) {
tokens.push(buf);
}
return tokens;
}
: (suffix: string) => suffix;

/**
* Method used to add the given prefix to the trie.
*/
const add = (suffix: string): void => {
let node: TrieNode = root;
let token: string;

for (let i = suffix.length - 1; i >= 0; i--) {
token = suffix[i];
const tokens = suffixToTokens(suffix);

for (let i = tokens.length - 1; i >= 0; i--) {
token = tokens[i];

if (node.has(token)) {
node = node.get(token)!;
Expand All @@ -64,14 +90,16 @@ export const createTrie = (from?: string[] | Set<string> | null) => {
};

/**
* @param {string} suffix
* @param {string} $suffix
*/
const contains = (suffix: string): boolean => {
let node: TrieNode | undefined = root;
let token: string;

for (let i = suffix.length - 1; i >= 0; i--) {
token = suffix[i];
const tokens = suffixToTokens(suffix);

for (let i = tokens.length - 1; i >= 0; i--) {
token = tokens[i];

node = node.get(token);
if (!node) return false;
Expand All @@ -86,87 +114,121 @@ export const createTrie = (from?: string[] | Set<string> | null) => {
let node: TrieNode | undefined = root;
let token: string;

for (let i = inputSuffix.length - 1; i >= 0; i--) {
token = inputSuffix[i];
const inputTokens = suffixToTokens(inputSuffix);

for (let i = inputTokens.length - 1; i >= 0; i--) {
token = inputTokens[i];

if (hostnameMode && token === '') {
break;
}

node = node.get(token);
if (!node) return [];
}

const matches: string[] = [];
const matches: Array<string | string[]> = [];

// Performing DFS from prefix
const nodeStack: TrieNode[] = [node];
const suffixStack: string[] = [inputSuffix];
const suffixStack: Array<string | string[]> = [inputTokens];

do {
const suffix: string = suffixStack.pop()!;
const suffix: string | string[] = suffixStack.pop()!;
node = nodeStack.pop()!;

if (node[SENTINEL]) {
if (includeEqualWithSuffix || suffix !== inputSuffix) {
if (includeEqualWithSuffix) {
matches.push(suffix);
} else if (hostnameMode) {
if ((suffix as string[]).some((t, i) => t !== inputTokens[i])) {
matches.push(suffix);
}
} else if (suffix !== inputTokens) {
matches.push(suffix);
}
}

node.forEach((childNode, k) => {
nodeStack.push(childNode);
suffixStack.push(k + suffix);

if (hostnameMode) {
const stack = (suffix as string[]).slice();
stack.unshift(k);

suffixStack.push(stack);
} else {
suffixStack.push(k + (suffix as string));
}
});
} while (nodeStack.length);

return matches;
return hostnameMode ? matches.map((m) => (m as string[]).join('')) : matches as string[];
};

/**
* Works like trie.find, but instead of returning the matches as an array, it removes them from the given set in-place.
*/
* Works like trie.find, but instead of returning the matches as an array, it removes them from the given set in-place.
*/
const substractSetInPlaceFromFound = (inputSuffix: string, set: Set<string>) => {
let node: TrieNode | undefined = root;
let token: string;

const inputTokens = suffixToTokens(inputSuffix);

// Find the leaf-est node, and early return if not any
for (let i = inputSuffix.length - 1; i >= 0; i--) {
token = inputSuffix[i];
for (let i = inputTokens.length - 1; i >= 0; i--) {
token = inputTokens[i];

node = node.get(token);
if (!node) return;
}

// Performing DFS from prefix
const nodeStack: TrieNode[] = [node];
const suffixStack: string[] = [inputSuffix];
const suffixStack: Array<string | string[]> = [inputTokens];

do {
const suffix = suffixStack.pop()!;
node = nodeStack.pop()!;

if (node[SENTINEL]) {
if (suffix !== inputSuffix) {
// found match, delete it from set
set.delete(suffix);
if (suffix !== inputTokens) {
// found match, delete it from set
if (hostnameMode) {
set.delete((suffix as string[]).join(''));
} else {
set.delete(suffix as string);
}
}
}

node.forEach((childNode, k) => {
nodeStack.push(childNode);
suffixStack.push(k + suffix);
if (hostnameMode) {
const stack = (suffix as string[]).slice();
stack.unshift(k);
suffixStack.push(stack);
} else {
suffixStack.push(k + (suffix as string));
}
});
} while (nodeStack.length);
};

/**
* Method used to delete a prefix from the trie.
*/
* Method used to delete a prefix from the trie.
*/
const remove = (suffix: string): boolean => {
let node: TrieNode | undefined = root;
let toPrune: TrieNode | null = null;
let tokenToPrune: string | null = null;
let parent: TrieNode = node;
let token: string;

for (let i = suffix.length - 1; i >= 0; i--) {
token = suffix[i];
const suffixTokens = suffixToTokens(suffix);

for (let i = suffixTokens.length - 1; i >= 0; i--) {
token = suffixTokens[i];
parent = node;

node = node.get(token);
Expand Down Expand Up @@ -203,13 +265,15 @@ export const createTrie = (from?: string[] | Set<string> | null) => {
};

/**
* Method used to assert whether the given prefix exists in the Trie.
*/
* Method used to assert whether the given prefix exists in the Trie.
*/
const has = (suffix: string): boolean => {
let node: TrieNode = root;

for (let i = suffix.length - 1; i >= 0; i--) {
const token = suffix[i];
const tokens = suffixToTokens(suffix);

for (let i = tokens.length - 1; i >= 0; i--) {
const token = tokens[i];

if (!node.has(token)) {
return false;
Expand Down

0 comments on commit 59b86f7

Please sign in to comment.