|
|
@@ -2,11 +2,13 @@
|
|
|
* Suffix Trie based on Mnemonist Trie
|
|
|
*/
|
|
|
|
|
|
-// import { Trie } from 'mnemonist';
|
|
|
+// const { Error, Bun, JSON, Symbol } = globalThis;
|
|
|
|
|
|
-export const SENTINEL = Symbol('SENTINEL');
|
|
|
+const SENTINEL = Symbol('SENTINEL');
|
|
|
const PARENT = Symbol('Parent Node');
|
|
|
|
|
|
+const noop: VoidFunction = () => { /** noop */ };
|
|
|
+
|
|
|
type TrieNode = {
|
|
|
[SENTINEL]: boolean,
|
|
|
[PARENT]: TrieNode | null,
|
|
|
@@ -69,7 +71,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|
|
: (suffix: string) => suffix;
|
|
|
|
|
|
/**
|
|
|
- * Method used to add the given prefix to the trie.
|
|
|
+ * Method used to add the given suffix to the trie.
|
|
|
*/
|
|
|
const add = (suffix: string): void => {
|
|
|
let node: TrieNode = root;
|
|
|
@@ -85,9 +87,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|
|
|
|
|
// During the adding of `[start]blog.skk.moe` and find out that there is a `[start].skk.moe` in the trie
|
|
|
// Dedupe the covered subdomain by skipping
|
|
|
- if (smolTree && (node.get('.')?.[SENTINEL])) {
|
|
|
- return;
|
|
|
- }
|
|
|
+ if (smolTree && (node.get('.')?.[SENTINEL])) return;
|
|
|
} else {
|
|
|
const newNode = createNode(node);
|
|
|
node.set(token, newNode);
|
|
|
@@ -129,155 +129,78 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|
|
node[SENTINEL] = true;
|
|
|
};
|
|
|
|
|
|
- /**
|
|
|
- * @param {string} $suffix
|
|
|
- */
|
|
|
- const contains = (suffix: string): boolean => {
|
|
|
+ const walkIntoLeafWithTokens = (
|
|
|
+ tokens: string | string[],
|
|
|
+ onLoop: (node: TrieNode, parent: TrieNode, token: string) => void = noop
|
|
|
+ ) => {
|
|
|
let node: TrieNode | undefined = root;
|
|
|
- let token: string;
|
|
|
+ let parent: TrieNode = node;
|
|
|
|
|
|
- const tokens = suffixToTokens(suffix);
|
|
|
+ let token: string;
|
|
|
|
|
|
for (let i = tokens.length - 1; i >= 0; i--) {
|
|
|
token = tokens[i];
|
|
|
|
|
|
- node = node.get(token);
|
|
|
- if (!node) return false;
|
|
|
- }
|
|
|
-
|
|
|
- return true;
|
|
|
- };
|
|
|
-
|
|
|
- /**
|
|
|
- * Method used to retrieve every item in the trie with the given prefix.
|
|
|
- */
|
|
|
- const find = (inputSuffix: string, /** @default true */ includeEqualWithSuffix = true): string[] => {
|
|
|
- if (smolTree) {
|
|
|
- throw new Error('A Trie with smolTree enabled cannot perform find!');
|
|
|
- }
|
|
|
-
|
|
|
- let node: TrieNode | undefined = root;
|
|
|
- let token: string;
|
|
|
-
|
|
|
- const inputTokens = suffixToTokens(inputSuffix);
|
|
|
-
|
|
|
- for (let i = inputTokens.length - 1; i >= 0; i--) {
|
|
|
- token = inputTokens[i];
|
|
|
-
|
|
|
if (hostnameMode && token === '') {
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
+ parent = node;
|
|
|
node = node.get(token);
|
|
|
- if (!node) return [];
|
|
|
+ if (!node) return null;
|
|
|
+
|
|
|
+ onLoop(node, parent, token);
|
|
|
}
|
|
|
|
|
|
- const matches: Array<string | string[]> = [];
|
|
|
+ return { node, parent };
|
|
|
+ };
|
|
|
+
|
|
|
+ const contains = (suffix: string): boolean => {
|
|
|
+ const tokens = suffixToTokens(suffix);
|
|
|
+ return walkIntoLeafWithTokens(tokens) !== null;
|
|
|
+ };
|
|
|
|
|
|
- // Performing DFS from prefix
|
|
|
- const nodeStack: TrieNode[] = [node];
|
|
|
- const suffixStack: Array<string | string[]> = [inputTokens];
|
|
|
+ const walk = (
|
|
|
+ onMatches: (suffix: string | string[]) => void,
|
|
|
+ initialNode = root,
|
|
|
+ initialSuffix: string | string[] = hostnameMode ? [] : ''
|
|
|
+ ) => {
|
|
|
+ const nodeStack: TrieNode[] = [initialNode];
|
|
|
+ // Resolving initial string (begin the start of the stack)
|
|
|
+ const suffixStack: Array<string | string[]> = [initialSuffix];
|
|
|
+
|
|
|
+ let node: TrieNode = root;
|
|
|
|
|
|
do {
|
|
|
- const suffix: string | string[] = suffixStack.pop()!;
|
|
|
node = nodeStack.pop()!;
|
|
|
-
|
|
|
- if (node[SENTINEL]) {
|
|
|
- if (includeEqualWithSuffix) {
|
|
|
- matches.push(suffix);
|
|
|
- } else if (isHostnameMode(suffix)) {
|
|
|
- if (suffix.some((t, i) => t !== inputTokens[i])) {
|
|
|
- matches.push(suffix);
|
|
|
- }
|
|
|
- } else if (suffix !== inputTokens) {
|
|
|
- matches.push(suffix);
|
|
|
- }
|
|
|
- }
|
|
|
+ const suffix = suffixStack.pop()!;
|
|
|
|
|
|
node.forEach((childNode, k) => {
|
|
|
+ // Pushing the child node to the stack for next iteration of DFS
|
|
|
nodeStack.push(childNode);
|
|
|
|
|
|
- if (isHostnameMode(suffix)) {
|
|
|
- suffixStack.push([k, ...suffix]);
|
|
|
- } else {
|
|
|
- suffixStack.push(k + suffix);
|
|
|
- }
|
|
|
+ suffixStack.push(isHostnameMode(suffix) ? [k, ...suffix] : k + suffix);
|
|
|
});
|
|
|
- } while (nodeStack.length);
|
|
|
-
|
|
|
- return hostnameMode ? matches.map((m) => (m as string[]).join('')) : matches as string[];
|
|
|
- };
|
|
|
-
|
|
|
- /**
|
|
|
- * Works like trie.find, but instead of returning the matches as an array, it removes them from the given set in-place.
|
|
|
- */
|
|
|
- const substractSetInPlaceFromFound = (inputSuffix: string, set: Set<string>) => {
|
|
|
- if (smolTree) {
|
|
|
- throw new Error('A Trie with smolTree enabled cannot perform substractSetInPlaceFromFound!');
|
|
|
- }
|
|
|
-
|
|
|
- let node: TrieNode | undefined = root;
|
|
|
- let token: string;
|
|
|
-
|
|
|
- const inputTokens = suffixToTokens(inputSuffix);
|
|
|
-
|
|
|
- // Find the leaf-est node, and early return if not any
|
|
|
- for (let i = inputTokens.length - 1; i >= 0; i--) {
|
|
|
- token = inputTokens[i];
|
|
|
-
|
|
|
- node = node.get(token);
|
|
|
- if (!node) return;
|
|
|
- }
|
|
|
-
|
|
|
- // Performing DFS from prefix
|
|
|
- const nodeStack: TrieNode[] = [node];
|
|
|
- const suffixStack: Array<string | string[]> = [inputTokens];
|
|
|
-
|
|
|
- do {
|
|
|
- const suffix = suffixStack.pop()!;
|
|
|
- node = nodeStack.pop()!;
|
|
|
|
|
|
+ // If the node is a sentinel, we push the suffix to the results
|
|
|
if (node[SENTINEL]) {
|
|
|
- // found match, delete it from set
|
|
|
- if (isHostnameMode(suffix)) {
|
|
|
- set.delete(suffix.join(''));
|
|
|
- } else if (suffix !== inputTokens) {
|
|
|
- set.delete(suffix);
|
|
|
- }
|
|
|
+ onMatches(suffix);
|
|
|
}
|
|
|
-
|
|
|
- node.forEach((childNode, k) => {
|
|
|
- nodeStack.push(childNode);
|
|
|
- if (isHostnameMode(suffix)) {
|
|
|
- const stack = [k, ...suffix];
|
|
|
- suffixStack.push(stack);
|
|
|
- } else {
|
|
|
- suffixStack.push(k + suffix);
|
|
|
- }
|
|
|
- });
|
|
|
} while (nodeStack.length);
|
|
|
};
|
|
|
|
|
|
- /**
|
|
|
- * Method used to delete a prefix from the trie.
|
|
|
- */
|
|
|
- const remove = (suffix: string): boolean => {
|
|
|
- let node: TrieNode | undefined = root;
|
|
|
+ interface FindSingleChildLeafResult {
|
|
|
+ node: TrieNode,
|
|
|
+ toPrune: TrieNode | null,
|
|
|
+ tokenToPrune: string | null,
|
|
|
+ parent: TrieNode
|
|
|
+ }
|
|
|
+
|
|
|
+ const getSingleChildLeaf = (tokens: string | string[]): FindSingleChildLeafResult | null => {
|
|
|
let toPrune: TrieNode | null = null;
|
|
|
let tokenToPrune: string | null = null;
|
|
|
- let parent: TrieNode = node;
|
|
|
- let token: string;
|
|
|
-
|
|
|
- const suffixTokens = suffixToTokens(suffix);
|
|
|
-
|
|
|
- for (let i = suffixTokens.length - 1; i >= 0; i--) {
|
|
|
- token = suffixTokens[i];
|
|
|
-
|
|
|
- parent = node;
|
|
|
- node = node.get(token);
|
|
|
-
|
|
|
- if (!node) return false;
|
|
|
|
|
|
+ const onLoop = (node: TrieNode, parent: TrieNode, token: string) => {
|
|
|
// Keeping track of a potential branch to prune
|
|
|
|
|
|
// Even if the node size is 1, but the single child is ".", we should retain the branch
|
|
|
@@ -297,11 +220,89 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|
|
toPrune = parent;
|
|
|
tokenToPrune = token;
|
|
|
}
|
|
|
+ };
|
|
|
+
|
|
|
+ const res = walkIntoLeafWithTokens(tokens, onLoop);
|
|
|
+
|
|
|
+ if (res === null) return null;
|
|
|
+ return { node: res.node, toPrune, tokenToPrune, parent: res.parent };
|
|
|
+ };
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Method used to retrieve every item in the trie with the given prefix.
|
|
|
+ */
|
|
|
+ const find = (inputSuffix: string, /** @default true */ includeEqualWithSuffix = true): string[] => {
|
|
|
+ if (smolTree) {
|
|
|
+ throw new Error('A Trie with smolTree enabled cannot perform find!');
|
|
|
}
|
|
|
|
|
|
- if (!node[SENTINEL]) return false;
|
|
|
+ const inputTokens = suffixToTokens(inputSuffix);
|
|
|
+ const res = walkIntoLeafWithTokens(inputTokens);
|
|
|
+ if (res === null) return [];
|
|
|
+
|
|
|
+ const matches: Array<string | string[]> = [];
|
|
|
+
|
|
|
+ const onMatches = includeEqualWithSuffix
|
|
|
+ ? (suffix: string | string[]) => matches.push(suffix)
|
|
|
+ : (
|
|
|
+ hostnameMode
|
|
|
+ ? (suffix: string[]) => {
|
|
|
+ if (suffix.some((t, i) => t !== inputTokens[i])) {
|
|
|
+ matches.push(suffix);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ : (suffix: string) => {
|
|
|
+ if (suffix !== inputTokens) {
|
|
|
+ matches.push(suffix);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ );
|
|
|
+
|
|
|
+ walk(
|
|
|
+ onMatches as any,
|
|
|
+ res.node, // Performing DFS from prefix
|
|
|
+ inputTokens
|
|
|
+ );
|
|
|
+
|
|
|
+ return hostnameMode ? matches.map((m) => (m as string[]).join('')) : matches as string[];
|
|
|
+ };
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Works like trie.find, but instead of returning the matches as an array, it removes them from the given set in-place.
|
|
|
+ */
|
|
|
+ const substractSetInPlaceFromFound = (inputSuffix: string, set: Set<string>) => {
|
|
|
+ if (smolTree) {
|
|
|
+ throw new Error('A Trie with smolTree enabled cannot perform substractSetInPlaceFromFound!');
|
|
|
+ }
|
|
|
+
|
|
|
+ const inputTokens = suffixToTokens(inputSuffix);
|
|
|
+
|
|
|
+ const res = walkIntoLeafWithTokens(inputTokens);
|
|
|
+ if (res === null) return;
|
|
|
+
|
|
|
+ const onMatches = hostnameMode
|
|
|
+ ? (suffix: string[]) => set.delete(suffix.join(''))
|
|
|
+ : (suffix: string) => set.delete(suffix);
|
|
|
+
|
|
|
+ walk(
|
|
|
+ onMatches as any,
|
|
|
+ res.node, // Performing DFS from prefix
|
|
|
+ inputTokens
|
|
|
+ );
|
|
|
+ };
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Method used to delete a prefix from the trie.
|
|
|
+ */
|
|
|
+ const remove = (suffix: string): boolean => {
|
|
|
+ const suffixTokens = suffixToTokens(suffix);
|
|
|
+ const res = getSingleChildLeaf(suffixTokens);
|
|
|
+ if (res === null) return false;
|
|
|
+
|
|
|
+ if (!res.node[SENTINEL]) return false;
|
|
|
|
|
|
size--;
|
|
|
+ const { node, toPrune, tokenToPrune } = res;
|
|
|
|
|
|
if (tokenToPrune && toPrune) {
|
|
|
toPrune.delete(tokenToPrune);
|
|
|
@@ -316,51 +317,22 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|
|
* Method used to assert whether the given prefix exists in the Trie.
|
|
|
*/
|
|
|
const has = (suffix: string): boolean => {
|
|
|
- let node: TrieNode = root;
|
|
|
-
|
|
|
const tokens = suffixToTokens(suffix);
|
|
|
+ const res = walkIntoLeafWithTokens(tokens);
|
|
|
|
|
|
- for (let i = tokens.length - 1; i >= 0; i--) {
|
|
|
- const token = tokens[i];
|
|
|
-
|
|
|
- if (!node.has(token)) {
|
|
|
- return false;
|
|
|
- }
|
|
|
-
|
|
|
- node = node.get(token)!;
|
|
|
- }
|
|
|
-
|
|
|
- return node[SENTINEL];
|
|
|
+ return res
|
|
|
+ ? res.node[SENTINEL]
|
|
|
+ : false;
|
|
|
};
|
|
|
|
|
|
const dump = () => {
|
|
|
- const nodeStack: TrieNode[] = [];
|
|
|
- const suffixStack: Array<string | string[]> = [];
|
|
|
-
|
|
|
- nodeStack.push(root);
|
|
|
- // Resolving initial string (begin the start of the stack)
|
|
|
- suffixStack.push(hostnameMode ? [] : '');
|
|
|
-
|
|
|
const results: string[] = [];
|
|
|
|
|
|
- let node: TrieNode;
|
|
|
-
|
|
|
- do {
|
|
|
- node = nodeStack.pop()!;
|
|
|
- const suffix = suffixStack.pop()!;
|
|
|
-
|
|
|
- node.forEach((childNode, k) => {
|
|
|
- // Pushing the child node to the stack for next iteration of DFS
|
|
|
- nodeStack.push(childNode);
|
|
|
-
|
|
|
- suffixStack.push(isHostnameMode(suffix) ? [k, ...suffix] : k + suffix);
|
|
|
- });
|
|
|
-
|
|
|
- // If the node is a sentinel, we push the suffix to the results
|
|
|
- if (node[SENTINEL]) {
|
|
|
- results.push(isHostnameMode(suffix) ? suffix.join('') : suffix);
|
|
|
- }
|
|
|
- } while (nodeStack.length);
|
|
|
+ walk(suffix => {
|
|
|
+ results.push(
|
|
|
+ isHostnameMode(suffix) ? suffix.join('') : suffix
|
|
|
+ );
|
|
|
+ });
|
|
|
|
|
|
return results;
|
|
|
};
|
|
|
@@ -370,44 +342,12 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|
|
throw new Error('whitelist method is only available in hostname mode or smolTree mode.');
|
|
|
}
|
|
|
|
|
|
- let node: TrieNode | undefined = root;
|
|
|
-
|
|
|
- let toPrune: TrieNode | null = null;
|
|
|
- let tokenToPrune: string | null = null;
|
|
|
- let parent: TrieNode = node;
|
|
|
-
|
|
|
const tokens = suffixToTokens(suffix);
|
|
|
- let token: string;
|
|
|
+ const res = getSingleChildLeaf(tokens);
|
|
|
|
|
|
- for (let i = tokens.length - 1; i >= 0; i--) {
|
|
|
- token = tokens[i];
|
|
|
-
|
|
|
- parent = node;
|
|
|
- node = node.get(token);
|
|
|
-
|
|
|
- if (!node) return;
|
|
|
+ if (res === null) return;
|
|
|
|
|
|
- // Keeping track of a potential branch to prune
|
|
|
-
|
|
|
- // Even if the node size is 1, but the single child is ".", we should retain the branch
|
|
|
- // Since the "." could be special if it is the leaf-est node
|
|
|
- const onlyChild = node.size < 2 && !node.has('.');
|
|
|
- // const onlyChild = node.size < 2 && (!hostnameMode || !node.has('.'));
|
|
|
-
|
|
|
- if (toPrune !== null) { // the top-est branch that could potentially being pruned
|
|
|
- if (!onlyChild) {
|
|
|
- // The branch has moew than single child, retain the branch.
|
|
|
- // And we need to abort prune the parent, so we set it to null
|
|
|
- toPrune = null;
|
|
|
- tokenToPrune = null;
|
|
|
- }
|
|
|
- } else if (onlyChild) {
|
|
|
- // There is only one token child, or no child at all, we can prune it safely
|
|
|
- // It is now the top-est branch that could potentially being pruned
|
|
|
- toPrune = parent;
|
|
|
- tokenToPrune = token;
|
|
|
- }
|
|
|
- }
|
|
|
+ const { node, toPrune, tokenToPrune, parent } = res;
|
|
|
|
|
|
// Trying to whitelist `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
|
|
|
if (tokens[0] === '.') {
|
|
|
@@ -423,7 +363,8 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|
|
dotNode[SENTINEL] = false;
|
|
|
}
|
|
|
|
|
|
- // if (!node[SENTINEL]) return;
|
|
|
+ // return early if not found
|
|
|
+ if (!node[SENTINEL]) return;
|
|
|
|
|
|
if (tokenToPrune && toPrune) {
|
|
|
toPrune.delete(tokenToPrune);
|
|
|
@@ -432,6 +373,7 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
|
|
|
}
|
|
|
};
|
|
|
|
|
|
+ // Actually build trie
|
|
|
if (Array.isArray(from)) {
|
|
|
for (let i = 0, l = from.length; i < l; i++) {
|
|
|
add(from[i]);
|