aho-corasick.bench.ts 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. import { fetchRemoteTextByLine } from './fetch-text-by-line';
  2. import createKeywordFilter from './aho-corasick';
  3. // eslint-disable import-x/no-unresolved -- benchmark
  4. import ModernAhoCorasick from 'modern-ahocorasick';
  5. import { AhoCorasick as MonyoneAhoCorasick } from '@monyone/aho-corasick';
  6. // @ts-expect-error -- no types
  7. import FastScanner from 'fastscan';
  8. import { AhoCorasick as RustAhoCorasick } from '@blackglory/aho-corasick';
  9. // eslint-enable import-x/no-unresolved
  10. function runKeywordFilter(data: string[], testFn: (line: string) => boolean) {
  11. for (let i = 0, len = data.length; i < len; i++) {
  12. testFn(data[i]);
  13. }
  14. }
  15. export function getFns(keywordsSet: string[] | readonly string[]) {
  16. const tmp1 = new ModernAhoCorasick(keywordsSet.slice());
  17. const tmp2 = new MonyoneAhoCorasick(keywordsSet.slice());
  18. const scanner = new FastScanner(keywordsSet.slice());
  19. const tmp3 = new RustAhoCorasick(keywordsSet.slice(), { caseSensitive: true });
  20. return [
  21. ['createKeywordFilter', createKeywordFilter(keywordsSet.slice())],
  22. ['modern-ahocorasick', (line: string) => tmp1.search(line).length > 0],
  23. ['@monyone/aho-corasick', (line: string) => tmp2.hasKeywordInText(line)],
  24. ['fastscan', (line: string) => scanner.search(line).length > 0],
  25. ['@blackglory/aho-corasick', (line: string) => tmp3.isMatch(line)]
  26. ] as const;
  27. }
  28. if (require.main === module) {
  29. (async () => {
  30. const { bench, group, run } = await import('mitata');
  31. const data = await Array.fromAsync(await fetchRemoteTextByLine('https://easylist.to/easylist/easylist.txt', true));
  32. console.log({ dataLen: data.length });
  33. const keywordsSet = [
  34. '!',
  35. '?',
  36. '*',
  37. '[',
  38. '(',
  39. ']',
  40. ')',
  41. ',',
  42. '#',
  43. '%',
  44. '&',
  45. '=',
  46. '~',
  47. // special modifier
  48. '$popup',
  49. '$removeparam',
  50. '$popunder',
  51. '$cname',
  52. '$frame',
  53. // some bad syntax
  54. '^popup'
  55. ];
  56. const fns = getFns(keywordsSet);
  57. group(() => {
  58. fns.forEach(([name, fn]) => {
  59. bench(name, () => runKeywordFilter(data, fn));
  60. });
  61. });
  62. run();
  63. })();
  64. }