Browse Source

Update Aho Corasick benchmark

SukkaW 1 year ago
parent
commit
200da7a2be
2 changed files with 100 additions and 11 deletions
  1. 73 0
      Build/lib/aho-corasick.bench.ts
  2. 27 11
      Build/lib/aho-corasick.test.ts

+ 73 - 0
Build/lib/aho-corasick.bench.ts

@@ -0,0 +1,73 @@
+import { fetchRemoteTextByLine } from './fetch-text-by-line';
+import { processLineFromReadline } from './process-line';
+
+import createKeywordFilter from './aho-corasick';
+
+import ModernAhoCorasick from 'modern-ahocorasick';
+import { AhoCorasick as MonyoneAhoCorasick } from '@monyone/aho-corasick';
+// @ts-expect-error -- no types
+import FastScanner from 'fastscan';
+import { AhoCorasick as RustAhoCorasick } from '@blackglory/aho-corasick';
+
+function runKeywordFilter(data: string[], testFn: (line: string) => boolean) {
+  for (let i = 0, len = data.length; i < len; i++) {
+    testFn(data[i]);
+  }
+}
+
+export function getFns(keywordsSet: string[] | readonly string[]) {
+  const tmp1 = new ModernAhoCorasick(keywordsSet.slice());
+  const tmp2 = new MonyoneAhoCorasick(keywordsSet.slice());
+  const scanner = new FastScanner(keywordsSet.slice());
+  const tmp3 = new RustAhoCorasick(keywordsSet.slice(), { caseSensitive: true });
+
+  return [
+    ['createKeywordFilter', createKeywordFilter(keywordsSet.slice())],
+    ['modern-ahocorasick', (line: string) => tmp1.search(line).length > 0],
+    ['@monyone/aho-corasick', (line: string) => tmp2.hasKeywordInText(line)],
+    ['fastscan', (line: string) => scanner.search(line).length > 0],
+    ['@blackglory/aho-corasick', (line: string) => tmp3.isMatch(line)]
+  ] as const;
+}
+
+if (require.main === module) {
+  (async () => {
+    const { bench, group, run } = await import('mitata');
+
+    const data = await processLineFromReadline(await fetchRemoteTextByLine('https://easylist.to/easylist/easylist.txt'));
+    console.log({ dataLen: data.length });
+    const keywordsSet = [
+      '!',
+      '?',
+      '*',
+      '[',
+      '(',
+      ']',
+      ')',
+      ',',
+      '#',
+      '%',
+      '&',
+      '=',
+      '~',
+      // special modifier
+      '$popup',
+      '$removeparam',
+      '$popunder',
+      '$cname',
+      '$frame',
+      // some bad syntax
+      '^popup'
+    ];
+
+    const fns = getFns(keywordsSet);
+
+    group(() => {
+      fns.forEach(([name, fn]) => {
+        bench(name, () => runKeywordFilter(data, fn));
+      });
+    });
+
+    run();
+  })();
+}

+ 27 - 11
Build/lib/aho-corasick.test.ts

@@ -1,17 +1,33 @@
 import { describe, it } from 'mocha';
 import { expect } from 'expect';
-import createKeywordFilter from './aho-corasick';
+import { getFns } from './aho-corasick.bench';
 
 describe('AhoCorasick', () => {
-  it('basic', () => {
-    let kwfilter = createKeywordFilter(['ap', 'an']);
-    expect(kwfilter('bananan')).toBe(true);
-    expect(kwfilter('apple')).toBe(true);
-    expect(kwfilter('melon')).toBe(false);
+  for (const test of ([
+    [
+      ['ap', 'an'],
+      ['bananan', 'apple', 'melon'],
+      [true, true, false]
+    ],
+    [
+      ['cdn', 'sukka'],
+      ['bananan', 'apple', 'melon'],
+      [false, false, false]
+    ]
+  ] as const)) {
+    const kwtests = getFns(test[0]);
+    const fixtures = test[1];
+    const expected = test[2];
 
-    kwfilter = createKeywordFilter(['cdn', 'sukka']);
-    expect(kwfilter('bananan')).toBe(false);
-    expect(kwfilter('apple')).toBe(false);
-    expect(kwfilter('melon')).toBe(false);
-  });
+    for (const kwtest of kwtests) {
+      const fnName = kwtest[0];
+      const fn = kwtest[1];
+
+      it(fnName, () => {
+        for (let i = 0, len = fixtures.length; i < len; i++) {
+          expect(fn(fixtures[i])).toBe(expected[i]);
+        }
+      });
+    }
+  }
 });