Browse Source

Perf: improve reject set dedupe performance

SukkaW 3 years ago
parent
commit
39f3dacf6e
2 changed files with 45 additions and 49 deletions
  1. 45 13
      Build/build-reject-domainset.js
  2. 0 36
      Build/worker/build-reject-domainset-worker.js

+ 45 - 13
Build/build-reject-domainset.js

@@ -148,24 +148,56 @@ const threads = require('os').cpus().length - 1;
   // Dedupe domainSets
   console.log(`Start deduping! (${previousSize})`);
 
+  const toBeRemoved = new Set();
+  for (const domain of domainSets) {
+    let isTobeRemoved = false;
+
+    for (const keyword of domainKeywordsSet) {
+      if (domain.includes(keyword) || keyword.includes(domain)) {
+        isTobeRemoved = true;
+        break;
+      }
+    }
+
+    if (!isTobeRemoved) {
+      for (const suffix of domainSuffixSet) {
+        if (domain.endsWith(suffix)) {
+          isTobeRemoved = true;
+          break;
+        }
+      }
+    }
+
+    if (!isTobeRemoved) {
+      for (const white of filterRuleWhitelistDomainSets) {
+        if (domain.includes(white) || white.includes(domain)) {
+          isTobeRemoved = true;
+          break;
+        }
+      }
+    }
+
+    if (isTobeRemoved) {
+      toBeRemoved.add(domain);
+    }
+  }
+
+  toBeRemoved.forEach((removed) => {
+    domainSets.delete(removed)
+  });
+
+  // Dedupe domainSets
+  console.log(`Deduped ${previousSize - domainSets.size} from black keywords and suffixes!`);
+
+  previousSize = domainSets.size;
+  // Dedupe domainSets
+  console.log(`Start deduping! (${previousSize})`);
+
   const piscina = new Piscina({
     filename: pathResolve(__dirname, 'worker/build-reject-domainset-worker.js'),
     workerData: [...domainSets]
   });
 
-  (await Promise.all([
-    piscina.run(
-      { keywords: domainKeywordsSet, suffixes: domainSuffixSet },
-      { name: 'dedupeKeywords' }
-    ),
-    piscina.run(
-      { whiteList: filterRuleWhitelistDomainSets },
-      { name: 'whitelisted' }
-    )
-  ])).forEach(set => {
-    set.forEach(i => domainSets.delete(i));
-  });
-
   (await Promise.all(
     Array.from(domainSets)
       .reduce((result, element, index) => {

+ 0 - 36
Build/worker/build-reject-domainset-worker.js

@@ -43,39 +43,3 @@ exports.dedupe = ({ chunk }) => {
 
   return outputToBeRemoved;
 };
-
-exports.whitelisted = ({ whiteList }) => {
-  const outputToBeRemoved = new Set();
-
-  for (const domain of workerData) {
-    for (const white of whiteList) {
-      if (domain.includes(white) || white.includes(domain)) {
-        outputToBeRemoved.add(domain);
-        break;
-      }
-    }
-  }
-
-  return outputToBeRemoved;
-};
-
-exports.dedupeKeywords = ({ keywords, suffixes }) => {
-  const outputToBeRemoved = new Set();
-
-  for (const domain of workerData) {
-    for (const keyword of keywords) {
-      if (domain.includes(keyword) || keyword.includes(domain)) {
-        outputToBeRemoved.add(domain);
-        break;
-      }
-    }
-    for (const suffix of suffixes) {
-      if (domain.endsWith(suffix)) {
-        outputToBeRemoved.add(domain);
-        break;
-      }
-    }
-  }
-
-  return outputToBeRemoved;
-}