ソースを参照

Fix/Perf: more efficient and correct whitelisting

SukkaW 1 年間 前
コミット
2f329a4144

+ 5 - 5
Build/build-reject-domainset.ts

@@ -94,9 +94,6 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
       }
       }
     });
     });
 
 
-    // Remove as many domains as possible from domainSets before creating trie
-    SetSubstract(domainSets, filterRuleWhitelistDomainSets);
-
     // Perform kwfilter to remove as many domains as possible from domainSets before creating trie
     // Perform kwfilter to remove as many domains as possible from domainSets before creating trie
     childSpan.traceChildSync('dedupe from black keywords', () => {
     childSpan.traceChildSync('dedupe from black keywords', () => {
       const kwfilter = createKeywordFilter(domainKeywordsSet);
       const kwfilter = createKeywordFilter(domainKeywordsSet);
@@ -110,11 +107,14 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
     });
     });
   });
   });
 
 
-  const trie = createTrie(domainSets, true, true);
-  span.traceChildSync('dedupe from white suffixes', () => {
+  const trie = span.traceChildSync('dedupe from white suffixes', () => {
+    const trie = createTrie(domainSets, true, true);
+
     filterRuleWhitelistDomainSets.forEach(suffix => {
     filterRuleWhitelistDomainSets.forEach(suffix => {
       trie.whitelist(suffix);
       trie.whitelist(suffix);
     });
     });
+
+    return trie;
   });
   });
 
 
   // Dedupe domainSets
   // Dedupe domainSets

+ 1 - 1
Build/lib/reject-data-source.ts

@@ -251,7 +251,7 @@ export const PREDEFINED_WHITELIST = [
   'business.site', // Drag'n'Drop site building platform
   'business.site', // Drag'n'Drop site building platform
   'page.link', // Firebase URL Shortener
   'page.link', // Firebase URL Shortener
   'notion.site'
   'notion.site'
-];
+].map(suffix => `.${suffix}`);
 
 
 export const PREDEFINED_ENFORCED_WHITELIST = [
 export const PREDEFINED_ENFORCED_WHITELIST = [
   'r2.dev',
   'r2.dev',

+ 20 - 1
Build/lib/trie.test.ts

@@ -208,6 +208,26 @@ describe('smol tree', () => {
     ]);
     ]);
   });
   });
 
 
+  it('should whitelist trie correctly', () => {
+    const trie = createTrie([
+      '.t.co',
+      't.co',
+      'example.t.co',
+      '.skk.moe'
+    ], true, true);
+
+    expect(trie.dump()).toStrictEqual([
+      '.skk.moe',
+      '.t.co'
+    ]);
+
+    trie.whitelist('.t.co');
+    expect(trie.dump()).toStrictEqual(['.skk.moe']);
+
+    trie.whitelist('skk.moe');
+    expect(trie.dump()).toStrictEqual([]);
+  });
+
   it('should efficiently whitelist domains', () => {
   it('should efficiently whitelist domains', () => {
     const trie = createTrie([
     const trie = createTrie([
       'skk.moe',
       'skk.moe',
@@ -231,7 +251,6 @@ describe('smol tree', () => {
     ]);
     ]);
 
 
     trie.whitelist('anotherskk.moe');
     trie.whitelist('anotherskk.moe');
-
     expect(trie.dump()).toStrictEqual([
     expect(trie.dump()).toStrictEqual([
       'blog.anotherskk.moe'
       'blog.anotherskk.moe'
     ]);
     ]);

+ 25 - 27
Build/lib/trie.ts

@@ -370,27 +370,11 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
       parent = node;
       parent = node;
 
 
       node = node.get(token);
       node = node.get(token);
-      if (!node) {
-        return;
-      }
-
-      // Keeping track of a potential branch to prune
-      // If the node is to be pruned, but they are more than one token child in it, we can't prune it
-      // If there is only one token child, or no child at all, we can prune it safely
-
-      const onlyChild = node.size === 1 && node.has(token);
-
-      if (onlyChild) {
-        toPrune = parent;
-        tokenToPrune = token;
-      } else if (toPrune !== null) { // not only child, retain the branch
-        toPrune = null;
-        tokenToPrune = null;
-      }
+      if (!node) return;
 
 
       // During the whitelist of `[start]blog.skk.moe` and find out that there is a `[start].skk.moe` in the trie
       // During the whitelist of `[start]blog.skk.moe` and find out that there is a `[start].skk.moe` in the trie
       // Dedupe the covered subdomain by skipping
       // Dedupe the covered subdomain by skipping
-      if (node.get('.')?.[SENTINEL]) {
+      if (i > 1 && node.get('.')?.[SENTINEL] === true) {
         return;
         return;
       }
       }
 
 
@@ -399,21 +383,35 @@ export const createTrie = (from?: string[] | Set<string> | null, hostnameMode =
         // If there is a `[start]sub.example.com` here, remove it
         // If there is a `[start]sub.example.com` here, remove it
         node[SENTINEL] = false;
         node[SENTINEL] = false;
 
 
-        // Removing the rest of the child nodes by creating a new node and disconnecting the old one
-        const newNode = createNode(node);
-        node.set('.', newNode);
-        node = newNode;
-        break;
+        // Removing all the child nodes by disconnecting "."
+        node.delete('.');
+      } else if (i === 0) {
+        // Trying to whitelist `example.com` when there is already a `.example.com` in the trie
+        const dotNode = node.get('.');
+        if (dotNode?.[SENTINEL] === true) {
+          dotNode[SENTINEL] = false;
+        }
       }
       }
-      if (i === 0) {
-        // Trying to add `example.com` when there is already a `.example.com` in the trie
-        if (node.get('.')?.[SENTINEL] === true) {
-          return;
+
+      // Keeping track of a potential branch to prune
+      // If the node is to be pruned, but they are more than one token child in it, we can't prune it
+      // If there is only one token child, or no child at all, we can prune it safely
+
+      if (toPrune != null) { // the first branch that could potentially being pruned
+        if (node.size > 1 || node.has('.')) {
+          // not only child, retain the branch.
+          // And we need to abort prune the parent, so we set it to null
+          toPrune = null;
+          tokenToPrune = null;
         }
         }
+      } else if (node.size < 2 && !node.has('.')) {
+        toPrune = parent;
+        tokenToPrune = token;
       }
       }
     }
     }
 
 
     if (!node[SENTINEL]) return false;
     if (!node[SENTINEL]) return false;
+
     if (tokenToPrune && toPrune) {
     if (tokenToPrune && toPrune) {
       toPrune.delete(tokenToPrune);
       toPrune.delete(tokenToPrune);
     } else {
     } else {