ソースを参照

Perf/Refactor: `processDomainList` now returns `string[]`

SukkaW 1 年間 前
コミット
21a31e6c1f

+ 5 - 9
Build/build-reject-domainset.ts

@@ -19,7 +19,7 @@ import { SHARED_DESCRIPTION } from './lib/constants';
 import { getPhishingDomains } from './lib/get-phishing-domains';
 
 import { add as SetAdd, subtract as SetSubstract } from 'mnemonist/set';
-import { setAddFromArray } from './lib/set-add-from-array';
+import { setAddFromArray, setAddFromArrayCurried } from './lib/set-add-from-array';
 import { sort } from './lib/timsort';
 
 export const buildRejectDomainSet = task(import.meta.path, async (span) => {
@@ -38,7 +38,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
         // Parse from remote hosts & domain lists
         ...HOSTS.map(entry => processHosts(childSpan, entry[0], entry[1], entry[2], entry[3]).then(hosts => SetAdd(domainSets, hosts))),
 
-        ...DOMAIN_LISTS.map(entry => processDomainLists(childSpan, entry[0], entry[1], entry[2]).then(hosts => SetAdd(domainSets, hosts))),
+        ...DOMAIN_LISTS.map(entry => processDomainLists(childSpan, entry[0], entry[1], entry[2]).then(setAddFromArrayCurried(domainSets))),
 
         ...ADGUARD_FILTERS.map(input => (
           typeof input === 'string'
@@ -60,13 +60,9 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
           setAddFromArray(filterRuleWhitelistDomainSets, white);
           setAddFromArray(filterRuleWhitelistDomainSets, black);
         }))),
-        getPhishingDomains(childSpan).then(([purePhishingDomains, fullPhishingDomainSet]) => {
-          SetAdd(domainSets, fullPhishingDomainSet);
-          setAddFromArray(domainSets, purePhishingDomains);
-        }),
-        childSpan.traceChildAsync('process reject_sukka.conf', async () => {
-          setAddFromArray(domainSets, await readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf')));
-        })
+        getPhishingDomains(childSpan).then(setAddFromArrayCurried(domainSets)),
+        childSpan.traceChildAsync('process reject_sukka.conf', () => readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/domainset/reject_sukka.conf'))
+          .then(setAddFromArrayCurried(domainSets)))
       ]);
       // eslint-disable-next-line sukka/no-single-return -- not single return
       return shouldStop;

+ 6 - 9
Build/lib/get-phishing-domains.ts

@@ -5,6 +5,7 @@ import { TTL } from './cache-filesystem';
 
 import { add as SetAdd } from 'mnemonist/set';
 import type { Span } from '../trace';
+import { appendArrayInPlace } from './append-array-in-place';
 
 const BLACK_TLD = new Set([
   'accountant',
@@ -92,13 +93,13 @@ const BLACK_TLD = new Set([
 export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('get phishing domains').traceAsyncFn(async (span) => {
   const gorhill = await getGorhillPublicSuffixPromise();
 
-  const domainSet = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => {
+  const domainArr = await span.traceChildAsync('download/parse/merge phishing domains', async (curSpan) => {
     const [domainSet, domainSet2] = await Promise.all([
       processDomainLists(curSpan, 'https://curbengh.github.io/phishing-filter/phishing-filter-domains.txt', true, TTL.THREE_HOURS()),
       processDomainLists(curSpan, 'https://phishing.army/download/phishing_army_blocklist.txt', true, TTL.THREE_HOURS())
     ]);
 
-    SetAdd(domainSet, domainSet2);
+    appendArrayInPlace(domainSet, domainSet2);
 
     return domainSet;
   });
@@ -106,8 +107,6 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
   const domainCountMap: Record<string, number> = {};
 
   span.traceChildSync('process phishing domain set', () => {
-    const domainArr = Array.from(domainSet);
-
     for (let i = 0, len = domainArr.length; i < len; i++) {
       const line = domainArr[i];
 
@@ -126,17 +125,15 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
     }
   });
 
-  const results = span.traceChildSync('get final phishing results', () => {
-    const res: string[] = [];
+  span.traceChildSync('get final phishing results', () => {
     for (const domain in domainCountMap) {
       if (domainCountMap[domain] >= 8) {
-        res.push(`.${domain}`);
+        domainArr.push(`.${domain}`);
       }
     }
-    return res;
   });
 
-  return [results, domainSet] as const;
+  return domainArr;
 });
 
 export function calcDomainAbuseScore(line: string) {

+ 5 - 5
Build/lib/parse-filter.ts

@@ -8,7 +8,7 @@ import type { PublicSuffixList } from '@gorhill/publicsuffixlist';
 import picocolors from 'picocolors';
 import { normalizeDomain } from './normalize-domain';
 import { fetchAssets } from './fetch-assets';
-import { deserializeSet, fsFetchCache, serializeSet } from './cache-filesystem';
+import { deserializeArray, deserializeSet, fsFetchCache, serializeArray, serializeSet } from './cache-filesystem';
 import type { Span } from '../trace';
 import createKeywordFilter from './aho-corasick';
 
@@ -20,7 +20,7 @@ export function processDomainLists(span: Span, domainListsUrl: string, includeAl
   return span.traceChild(`process domainlist: ${domainListsUrl}`).traceAsyncFn(() => fsFetchCache.apply(
     domainListsUrl,
     async () => {
-      const domainSets = new Set<string>();
+      const domainSets: string[] = [];
 
       for await (const line of await fetchRemoteTextByLine(domainListsUrl)) {
         let domainToAdd = processLine(line);
@@ -33,7 +33,7 @@ export function processDomainLists(span: Span, domainListsUrl: string, includeAl
           foundDebugDomain = true;
         }
 
-        domainSets.add(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd);
+        domainSets.push(includeAllSubDomain ? `.${domainToAdd}` : domainToAdd);
       }
 
       return domainSets;
@@ -41,8 +41,8 @@ export function processDomainLists(span: Span, domainListsUrl: string, includeAl
     {
       ttl,
       temporaryBypass,
-      serializer: serializeSet,
-      deserializer: deserializeSet
+      serializer: serializeArray,
+      deserializer: deserializeArray
     }
   ));
 }

+ 26 - 0
Build/lib/set-add-from-array.bench.ts

@@ -0,0 +1,26 @@
+import { fetchRemoteTextByLine } from './fetch-text-by-line';
+import { processLineFromReadline } from './process-line';
+
+import { bench, group, run } from 'mitata';
+
+(async () => {
+  const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
+
+  group('setAddFromArray', () => {
+    bench('run', () => {
+      const set = new Set(['1', '2', '1', '3', 'skk.moe']);
+      for (let i = 0, len = data.length; i < len; i++) {
+        set.add(data[i]);
+      }
+    });
+  });
+  group('setAddFromArray', () => {
+    bench('run', () => {
+      const set = new Set(['1', '2', '1', '3', 'skk.moe']);
+      // eslint-disable-next-line @typescript-eslint/unbound-method -- thisArg is passed
+      data.forEach(set.add, set);
+    });
+  });
+
+  run();
+})();

+ 8 - 3
Build/lib/set-add-from-array.ts

@@ -2,7 +2,12 @@
  * In-place adding of elements from an array to a set.
  */
 export function setAddFromArray<T>(set: Set<T>, arr: T[]): void {
-  for (let i = 0, len = arr.length; i < len; i++) {
-    set.add(arr[i]);
-  }
+  // for (let i = 0, len = arr.length; i < len; i++) {
+  //   set.add(arr[i]);
+  // }
+  // eslint-disable-next-line @typescript-eslint/unbound-method -- thisArg is passed
+  arr.forEach(set.add, set);
 }
+
+// eslint-disable-next-line @typescript-eslint/unbound-method -- thisArg is passed
+export const setAddFromArrayCurried = <T>(set: Set<T>) => (arr: T[]) => arr.forEach(set.add, set);