Browse Source

Chore: reject domainset update

SukkaW 3 years ago
parent
commit
c91d22b2d3

+ 5 - 2
Build/build-reject-domainset.js

@@ -1,7 +1,7 @@
 const { promises: fsPromises } = require('fs');
 const { resolve: pathResolve } = require('path');
 const Piscina = require('piscina');
-const { processHosts, processFilterRules } = require('./lib/parse-filter');
+const { processHosts, processFilterRules, preprocessFullDomainSetBeforeUsedAsWorkerData } = require('./lib/parse-filter');
 const cpuCount = require('os').cpus().length;
 const { isCI } = require('ci-info');
 const threads = isCI ? cpuCount : cpuCount / 2;
@@ -68,6 +68,7 @@ const threads = isCI ? cpuCount : cpuCount / 2;
     'ip6-allrouters',
     'ip6-allhosts',
     'mcastprefix',
+    'skk.moe',
     'analytics.google.com',
     'msa.cdn.mediaset.net', // Added manually using DOMAIN-KEYWORDS
     'cloud.answerhub.com',
@@ -145,6 +146,8 @@ const threads = isCI ? cpuCount : cpuCount / 2;
     'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/GameConsoleAdblockList.txt',
     // PiHoleBlocklist
     'https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/SmartTV-AGH.txt',
+    // Spam404
+    'https://raw.githubusercontent.com/Spam404/lists/master/adblock-list.txt'
   ].map(input => {
     if (typeof input === 'string') {
       return processFilterRules(input);
@@ -233,7 +236,7 @@ const threads = isCI ? cpuCount : cpuCount / 2;
 
   const piscina = new Piscina({
     filename: pathResolve(__dirname, 'worker/build-reject-domainset-worker.js'),
-    workerData: [...domainSets],
+    workerData: preprocessFullDomainSetBeforeUsedAsWorkerData([...domainSets]),
     idleTimeout: 50,
     minThreads: threads,
     maxThreads: threads

+ 23 - 0
Build/lib/parse-filter.js

@@ -125,6 +125,7 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) {
       || line.includes('!')
       || line.includes('*')
       || line.includes('/')
+      || line.includes('[')
       || line.includes('$') && !lineStartsWithDoubleVerticalBar
       || line === ''
       || isIP(line) !== 0
@@ -214,6 +215,28 @@ async function processFilterRules (filterRulesUrl, fallbackUrls) {
   };
 }
 
+function preprocessFullDomainSetBeforeUsedAsWorkerData (data) {
+  return data.filter(domain => (
+    domain.charCodeAt(0) === 46
+    && !canExcludeFromDedupe(domain)
+  ));
+}
+
+// duckdns.org domain will not overlap and doesn't need dedupe
+function canExcludeFromDedupe (domain) {
+  if (
+    // starts with a dot
+    domain.charCodeAt(0) === 46
+    && domain.length === 23
+    && domain.endsWith('.duckdns.org')
+  ) {
+    return true;
+  }
+  return false;
+}
+
 module.exports.processDomainLists = processDomainLists;
 module.exports.processHosts = processHosts;
 module.exports.processFilterRules = processFilterRules;
+module.exports.preprocessFullDomainSetBeforeUsedAsWorkerData = preprocessFullDomainSetBeforeUsedAsWorkerData;
+module.exports.canExcludeFromDedupe = canExcludeFromDedupe;

+ 4 - 14
Build/worker/build-reject-domainset-worker.js

@@ -1,12 +1,10 @@
 const Piscina = require('piscina');
 // pre check if fullset domain is starts with a "."
 // This avoid calling chatCodeAt repeatedly
+const { canExcludeFromDedupe } = require('../lib/parse-filter')
 
-// workerData is an array of string. Sort it by length, short first:
-const fullsetDomainStartsWithADot = Piscina.workerData.filter(domain => (
-  domain.charCodeAt(0) === 46
-  && !canExcludeFromDedupe(domain)
-));
+// workerData is an array of string, sorted by length, short first
+const fullsetDomainStartsWithADot = Piscina.workerData
 const totalLen = fullsetDomainStartsWithADot.length;
 
 module.exports.dedupe = ({ chunk }) => {
@@ -60,13 +58,5 @@ module.exports.dedupe = ({ chunk }) => {
     }
   }
 
-  return outputToBeRemoved;
+  return Piscina.move(outputToBeRemoved);
 };
-
-// duckdns.org domain will not overlap and doesn't need dedupe
-function canExcludeFromDedupe (domain) {
-  if (domain.length === 23 && domain.endsWith('.duckdns.org')) {
-    return true;
-  }
-  return false;
-}