浏览代码

Perf: remove cached tld parse

SukkaW 1 年之前
父节点
当前提交
35aa11f361
共有 3 个文件被更改,包括 26 次插入46 次删除
  1. 0 9
      Build/lib/cached-tld-parse.ts
  2. 1 3
      Build/lib/get-phishing-domains.ts
  3. 25 34
      Build/lib/trie.ts

+ 0 - 9
Build/lib/cached-tld-parse.ts

@@ -1,9 +0,0 @@
-import { createCache } from './cache-apply';
-import type { PublicSuffixList } from '@gorhill/publicsuffixlist';
-
-let gorhillGetDomainCache: ReturnType<typeof createCache> | null = null;
-export const createCachedGorhillGetDomain = (gorhill: PublicSuffixList) => {
-  gorhillGetDomainCache ??= createCache('cached-gorhill-get-domain', true);
-  return (domain: string) => gorhillGetDomainCache! // we do know gothillGetDomainCache exists here
-    .sync(domain, () => gorhill.getDomain(domain[0] === '.' ? domain.slice(1) : domain));
-};

+ 1 - 3
Build/lib/get-phishing-domains.ts

@@ -2,7 +2,6 @@ import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
 import { processDomainLists } from './parse-filter';
 import * as tldts from 'tldts';
 import { createTrie } from './trie';
-import { createCachedGorhillGetDomain } from './cached-tld-parse';
 import { processLine } from './process-line';
 import { TTL } from './cache-filesystem';
 import { isCI } from 'ci-info';
@@ -130,7 +129,6 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
   });
 
   const domainCountMap: Record<string, number> = {};
-  const getDomain = createCachedGorhillGetDomain(gorhill);
 
   span.traceChildSync('process phishing domain set', () => {
     const domainArr = Array.from(domainSet);
@@ -139,7 +137,7 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
       const line = processLine(domainArr[i]);
       if (!line) continue;
 
-      const apexDomain = getDomain(line);
+      const apexDomain = gorhill.getDomain(line);
       if (!apexDomain) continue;
 
       domainCountMap[apexDomain] ||= 0;

+ 25 - 34
Build/lib/trie.ts

@@ -79,11 +79,10 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
       if (node.has(token)) {
         node = node.get(token)!;
 
-        if (smolTree) {
-          if (node.get('.')?.[SENTINEL] === true) {
-            return;
-          }
-          // return;
+        // During the adding of `[start]blog.skk.moe` and find out that there is a `[start].skk.moe` in the trie
+        // Dedupe the covered subdomain by skipping
+        if (smolTree && (node.get('.')?.[SENTINEL])) {
+          return;
         }
       } else {
         const newNode = createNode(node);
@@ -92,9 +91,12 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
       }
 
       if (smolTree) {
+        // Trying to add `[start].sub.example.com` where there is already a `[start]blog.sub.example.com` in the trie
         if (i === 1 && tokens[0] === '.') {
+          // If there is a `[start]sub.example.com` here, remove it
           node[SENTINEL] = false;
-          // Trying to add `.sub.example.com` where there is already a `blog.sub.example.com` in the trie
+
+          // Removing the rest of the child nodes by creating a new node and disconnecting the old one
           const newNode = createNode(node);
           node.set('.', newNode);
           node = newNode;
@@ -225,13 +227,11 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
       node = nodeStack.pop()!;
 
       if (node[SENTINEL]) {
-        if (suffix !== inputTokens) {
-          // found match, delete it from set
-          if (hostnameMode) {
-            set.delete((suffix as string[]).join(''));
-          } else {
-            set.delete(suffix as string);
-          }
+        // found match, delete it from set
+        if (hostnameMode) {
+          set.delete((suffix as string[]).join(''));
+        } else if (suffix !== inputTokens) {
+          set.delete(suffix as string);
         }
       }
 
@@ -317,37 +317,22 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
     return node[SENTINEL];
   };
 
-  if (Array.isArray(from)) {
-    for (let i = 0, l = from.length; i < l; i++) {
-      add(from[i]);
-    }
-  } else if (from) {
-    from.forEach(add);
-  }
-
   const dump = () => {
     const nodeStack: TrieNode[] = [];
     const suffixStack: Array<string | string[]> = [];
-    // Resolving initial string
-    const suffix = hostnameMode ? [] : '';
 
     nodeStack.push(root);
-    suffixStack.push(suffix);
+    // Resolving initial string (begin the start of the stack)
+    suffixStack.push(hostnameMode ? [] : '');
 
     const results: string[] = [];
 
     let node: TrieNode;
 
     do {
-      let hasValue = false;
-
       node = nodeStack.pop()!;
       const suffix = suffixStack.pop()!;
 
-      if (node[SENTINEL]) {
-        hasValue = true;
-      }
-
       node.forEach((childNode, k) => {
         nodeStack.push(childNode);
 
@@ -358,16 +343,22 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
         }
       });
 
-      if (hasValue) {
-        results.push(
-          hostnameMode ? (suffix as string[]).join('') : (suffix as string)
-        );
+      if (node[SENTINEL]) {
+        results.push(hostnameMode ? (suffix as string[]).join('') : (suffix as string));
       }
     } while (nodeStack.length);
 
     return results;
   };
 
+  if (Array.isArray(from)) {
+    for (let i = 0, l = from.length; i < l; i++) {
+      add(from[i]);
+    }
+  } else if (from) {
+    from.forEach(add);
+  }
+
   return {
     add,
     contains,