trie.ts 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. /**
  2. * Suffix Trie based on Mnemonist Trie
  3. */
  4. // import { Trie } from 'mnemonist';
  5. export const SENTINEL = Symbol('SENTINEL');
  6. type TrieNode = {
  7. [SENTINEL]: boolean,
  8. [Bun.inspect.custom]: () => string
  9. } & Map<string, TrieNode>;
  10. const deepTrieNodeToJSON = (node: TrieNode) => {
  11. const obj: Record<string, any> = {};
  12. if (node[SENTINEL]) {
  13. obj['[start]'] = node[SENTINEL];
  14. }
  15. node.forEach((value, key) => {
  16. obj[key] = deepTrieNodeToJSON(value);
  17. });
  18. return obj;
  19. };
  20. function trieNodeInspectCustom(this: TrieNode) {
  21. return JSON.stringify(deepTrieNodeToJSON(this), null, 2);
  22. }
  23. const createNode = (): TrieNode => {
  24. const node = new Map<string, TrieNode>() as TrieNode;
  25. node[SENTINEL] = false;
  26. node[Bun.inspect.custom] = trieNodeInspectCustom;
  27. return node;
  28. };
  29. export const createTrie = (from?: string[] | Set<string> | null, hostnameMode = false) => {
  30. let size = 0;
  31. const root: TrieNode = createNode();
  32. const suffixToTokens = hostnameMode
  33. ? (suffix: string) => {
  34. let buf = '';
  35. const tokens: string[] = [];
  36. for (let i = 0, l = suffix.length; i < l; i++) {
  37. const c = suffix[i];
  38. if (c === '.') {
  39. if (buf) {
  40. tokens.push(buf, /* . */ c);
  41. buf = '';
  42. } else {
  43. tokens.push(/* . */ c);
  44. }
  45. } else {
  46. buf += c;
  47. }
  48. }
  49. if (buf) {
  50. tokens.push(buf);
  51. }
  52. return tokens;
  53. }
  54. : (suffix: string) => suffix;
  55. /**
  56. * Method used to add the given prefix to the trie.
  57. */
  58. const add = (suffix: string): void => {
  59. let node: TrieNode = root;
  60. let token: string;
  61. const tokens = suffixToTokens(suffix);
  62. for (let i = tokens.length - 1; i >= 0; i--) {
  63. token = tokens[i];
  64. if (node.has(token)) {
  65. node = node.get(token)!;
  66. } else {
  67. const newNode = createNode();
  68. node.set(token, newNode);
  69. node = newNode;
  70. }
  71. }
  72. // Do we need to increase size?
  73. if (!node[SENTINEL]) {
  74. size++;
  75. }
  76. node[SENTINEL] = true;
  77. };
  78. /**
  79. * @param {string} $suffix
  80. */
  81. const contains = (suffix: string): boolean => {
  82. let node: TrieNode | undefined = root;
  83. let token: string;
  84. const tokens = suffixToTokens(suffix);
  85. for (let i = tokens.length - 1; i >= 0; i--) {
  86. token = tokens[i];
  87. node = node.get(token);
  88. if (!node) return false;
  89. }
  90. return true;
  91. };
  92. /**
  93. * Method used to retrieve every item in the trie with the given prefix.
  94. */
  95. const find = (inputSuffix: string, /** @default true */ includeEqualWithSuffix = true): string[] => {
  96. let node: TrieNode | undefined = root;
  97. let token: string;
  98. const inputTokens = suffixToTokens(inputSuffix);
  99. for (let i = inputTokens.length - 1; i >= 0; i--) {
  100. token = inputTokens[i];
  101. if (hostnameMode && token === '') {
  102. break;
  103. }
  104. node = node.get(token);
  105. if (!node) return [];
  106. }
  107. const matches: Array<string | string[]> = [];
  108. // Performing DFS from prefix
  109. const nodeStack: TrieNode[] = [node];
  110. const suffixStack: Array<string | string[]> = [inputTokens];
  111. do {
  112. const suffix: string | string[] = suffixStack.pop()!;
  113. node = nodeStack.pop()!;
  114. if (node[SENTINEL]) {
  115. if (includeEqualWithSuffix) {
  116. matches.push(suffix);
  117. } else if (hostnameMode) {
  118. if ((suffix as string[]).some((t, i) => t !== inputTokens[i])) {
  119. matches.push(suffix);
  120. }
  121. } else if (suffix !== inputTokens) {
  122. matches.push(suffix);
  123. }
  124. }
  125. node.forEach((childNode, k) => {
  126. nodeStack.push(childNode);
  127. if (hostnameMode) {
  128. const stack = (suffix as string[]).slice();
  129. stack.unshift(k);
  130. suffixStack.push(stack);
  131. } else {
  132. suffixStack.push(k + (suffix as string));
  133. }
  134. });
  135. } while (nodeStack.length);
  136. return hostnameMode ? matches.map((m) => (m as string[]).join('')) : matches as string[];
  137. };
  138. /**
  139. * Works like trie.find, but instead of returning the matches as an array, it removes them from the given set in-place.
  140. */
  141. const substractSetInPlaceFromFound = (inputSuffix: string, set: Set<string>) => {
  142. let node: TrieNode | undefined = root;
  143. let token: string;
  144. const inputTokens = suffixToTokens(inputSuffix);
  145. // Find the leaf-est node, and early return if not any
  146. for (let i = inputTokens.length - 1; i >= 0; i--) {
  147. token = inputTokens[i];
  148. node = node.get(token);
  149. if (!node) return;
  150. }
  151. // Performing DFS from prefix
  152. const nodeStack: TrieNode[] = [node];
  153. const suffixStack: Array<string | string[]> = [inputTokens];
  154. do {
  155. const suffix = suffixStack.pop()!;
  156. node = nodeStack.pop()!;
  157. if (node[SENTINEL]) {
  158. if (suffix !== inputTokens) {
  159. // found match, delete it from set
  160. if (hostnameMode) {
  161. set.delete((suffix as string[]).join(''));
  162. } else {
  163. set.delete(suffix as string);
  164. }
  165. }
  166. }
  167. node.forEach((childNode, k) => {
  168. nodeStack.push(childNode);
  169. if (hostnameMode) {
  170. const stack = (suffix as string[]).slice();
  171. stack.unshift(k);
  172. suffixStack.push(stack);
  173. } else {
  174. suffixStack.push(k + (suffix as string));
  175. }
  176. });
  177. } while (nodeStack.length);
  178. };
  179. /**
  180. * Method used to delete a prefix from the trie.
  181. */
  182. const remove = (suffix: string): boolean => {
  183. let node: TrieNode | undefined = root;
  184. let toPrune: TrieNode | null = null;
  185. let tokenToPrune: string | null = null;
  186. let parent: TrieNode = node;
  187. let token: string;
  188. const suffixTokens = suffixToTokens(suffix);
  189. for (let i = suffixTokens.length - 1; i >= 0; i--) {
  190. token = suffixTokens[i];
  191. parent = node;
  192. node = node.get(token);
  193. if (!node) {
  194. return false;
  195. }
  196. // Keeping track of a potential branch to prune
  197. // If the node is to be pruned, but they are more than one token child in it, we can't prune it
  198. // If there is only one token child, or no child at all, we can prune it safely
  199. const onlyChild = node.size === 1 && node.has(token);
  200. if (onlyChild) {
  201. toPrune = parent;
  202. tokenToPrune = token;
  203. } else if (toPrune !== null) { // not only child, retain the branch
  204. toPrune = null;
  205. tokenToPrune = null;
  206. }
  207. }
  208. if (!node[SENTINEL]) return false;
  209. size--;
  210. if (tokenToPrune && toPrune) {
  211. toPrune.delete(tokenToPrune);
  212. } else {
  213. node[SENTINEL] = false;
  214. }
  215. return true;
  216. };
  217. /**
  218. * Method used to assert whether the given prefix exists in the Trie.
  219. */
  220. const has = (suffix: string): boolean => {
  221. let node: TrieNode = root;
  222. const tokens = suffixToTokens(suffix);
  223. for (let i = tokens.length - 1; i >= 0; i--) {
  224. const token = tokens[i];
  225. if (!node.has(token)) {
  226. return false;
  227. }
  228. node = node.get(token)!;
  229. }
  230. return node[SENTINEL];
  231. };
  232. if (Array.isArray(from)) {
  233. for (let i = 0, l = from.length; i < l; i++) {
  234. add(from[i]);
  235. }
  236. } else if (from) {
  237. from.forEach(add);
  238. }
  239. const dump = () => {
  240. const node = root;
  241. const nodeStack: TrieNode[] = [];
  242. const suffixStack: string[] = [];
  243. // Resolving initial string
  244. const suffix = '';
  245. nodeStack.push(node);
  246. suffixStack.push(suffix);
  247. const results: string[] = [];
  248. let currentNode: TrieNode;
  249. let currentPrefix: string;
  250. let hasValue = false;
  251. do {
  252. currentNode = nodeStack.pop()!;
  253. currentPrefix = suffixStack.pop()!;
  254. if (currentNode[SENTINEL]) {
  255. hasValue = true;
  256. }
  257. node.forEach((childNode, k) => {
  258. nodeStack.push(childNode);
  259. suffixStack.push(k + suffix);
  260. });
  261. if (hasValue) results.push(currentPrefix);
  262. } while (nodeStack.length);
  263. return results;
  264. };
  265. return {
  266. add,
  267. contains,
  268. find,
  269. substractSetInPlaceFromFound,
  270. remove,
  271. delete: remove,
  272. has,
  273. dump,
  274. get size() {
  275. return size;
  276. },
  277. get root() {
  278. return root;
  279. },
  280. [Bun.inspect.custom]: () => JSON.stringify(deepTrieNodeToJSON(root), null, 2)
  281. };
  282. };
  283. export default createTrie;