Browse Source

Simplify kwfilter

SukkaW 2 years ago
parent
commit
b7a11b55ed
2 changed files with 61 additions and 61 deletions
  1. 12 0
      Build/lib/aho-corasick.test.ts
  2. 49 61
      Build/lib/aho-corasick.ts

+ 12 - 0
Build/lib/aho-corasick.test.ts

@@ -0,0 +1,12 @@
+// eslint-disable-next-line import/no-unresolved -- bun
+import { describe, expect, it } from 'bun:test';
+import createKeywordFilter from './aho-corasick';
+
+describe('AhoCorasick', () => {
+  it('basic', () => {
+    const kwfilter = createKeywordFilter(['ap', 'an']);
+    expect(kwfilter('bananan')).toBeTrue();
+    expect(kwfilter('apple')).toBeTrue();
+    expect(kwfilter('melon')).toBeFalse();
+  });
+});

+ 49 - 61
Build/lib/aho-corasick.ts

@@ -1,95 +1,83 @@
 interface Node {
 interface Node {
-  /** @default 0 */
-  depth?: number,
-  key: string,
   /** @default false */
   /** @default false */
-  word?: boolean,
-  children: Record<string, Node>,
-  fail?: Node,
-  count: number
+  wordEnd?: boolean,
+  children: Map<string, Node | undefined>,
+  fail?: Node
 }
 }
 
 
-const createNode = (key: string, depth = 0): Node => ({
-  depth,
-  key,
-  word: false,
-  children: {},
-  fail: undefined,
-  count: 0
+const createNode = (): Node => ({
+  wordEnd: false,
+  children: new Map(),
+  fail: undefined
 });
 });
 
 
 const createKeywordFilter = (keys: string[] | Set<string>) => {
 const createKeywordFilter = (keys: string[] | Set<string>) => {
-  const root = createNode('root');
-
-  const build = () => {
-    const queue: Node[] = [];
-    queue.push(root);
-
-    let idx = 0;
-    while (queue.length > idx) {
-      const beginNode = queue[idx];
-      const map = beginNode.children;
-      // eslint-disable-next-line guard-for-in -- plain object
-      for (const key in beginNode.children) {
-        const node = map[key];
-        let failNode = beginNode.fail;
-
-        while (failNode && !failNode.children[key]) {
-          failNode = failNode.fail;
-        }
-
-        if (node) {
-          node.fail = failNode?.children[key] || root;
-
-          queue.push(node);
-        }
-      }
-
-      idx++;
-    }
-  };
+  const root = createNode();
 
 
-  const put = (key: string, len: number) => {
+  const put = (key: string, len = key.length) => {
     let node = root;
     let node = root;
     const lastIdx = len - 1;
     const lastIdx = len - 1;
-    node.count++;
+
     for (let idx = 0; idx < len; idx++) {
     for (let idx = 0; idx < len; idx++) {
-      const val = key[idx];
-      const nextNode = node.children[val];
+      const char = key[idx];
 
 
-      if (nextNode) {
-        nextNode.count++;
-        node = nextNode;
+      if (node.children.has(char)) {
+        node = node.children.get(char)!;
       } else {
       } else {
-        const newNode = createNode(val, idx + 1);
-        newNode.count = 1;
-        node.children[val] = newNode;
+        const newNode = createNode();
+        node.children.set(char, newNode);
         node = newNode;
         node = newNode;
       }
       }
 
 
-      if (lastIdx === idx && node.depth) {
-        node.word = true;
+      if (lastIdx === idx && node !== root) {
+        node.wordEnd = true;
       }
       }
     }
     }
   };
   };
 
 
-  keys.forEach(k => put(k, k.length));
+  keys.forEach(k => put(k));
+
+  // const build = () => {
+  const queue: Node[] = [];
+  queue.push(root);
+
+  let idx = 0;
+  while (queue.length > idx) {
+    const beginNode = queue[idx];
+    const children = beginNode.children;
+
+    children.forEach((node, char) => {
+      let failNode = beginNode.fail;
+
+      while (failNode && !failNode.children.has(char)) {
+        failNode = failNode.fail;
+      }
+
+      if (node) {
+        node.fail = failNode?.children.get(char) || root;
+
+        queue.push(node);
+      }
+    });
 
 
-  build();
+    idx++;
+  }
+  // };
+  // build();
 
 
   return (text: string) => {
   return (text: string) => {
     let node: Node | undefined = root;
     let node: Node | undefined = root;
 
 
     for (let i = 0, textLen = text.length; i < textLen; i++) {
     for (let i = 0, textLen = text.length; i < textLen; i++) {
       // const key = text.charAt(i);
       // const key = text.charAt(i);
-      const key = text[i];
+      const char = text[i];
 
 
-      while (node && !node.children[key]) {
+      while (node && !node.children.has(char)) {
         node = node.fail;
         node = node.fail;
       }
       }
-      node = node?.children[key] || root;
+      node = node?.children.get(char) || root;
 
 
-      if (node.word) {
+      if (node.wordEnd) {
         return true;
         return true;
       }
       }
     }
     }