Browse Source

Perf: attempts to make phishing hosts processing faster

SukkaW 1 year ago
parent
commit
eac8256e2e

+ 5 - 1
Build/constants/reject-data-source.ts

@@ -286,7 +286,11 @@ export const ADGUARD_FILTERS_EXTRA: AdGuardFilterSource[] = [
     ]
     ]
   ],
   ],
   // no coin list adguard list is more maintained than its hosts
   // no coin list adguard list is more maintained than its hosts
-  ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/nocoin.txt', [], true],
+  [
+    'https://cdn.jsdelivr.net/gh/hoshsadiq/adblock-nocoin-list@master/nocoin.txt',
+    ['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/nocoin.txt'],
+    true
+  ],
   // AdGuard Annoyances filter
   // AdGuard Annoyances filter
   [
   [
     'https://filters.adtidy.org/extension/ublock/filters/14_optimized.txt',
     'https://filters.adtidy.org/extension/ublock/filters/14_optimized.txt',

+ 20 - 16
Build/lib/get-phishing-domains.ts

@@ -37,7 +37,7 @@ const pool = new Worktank({
       const { BLACK_TLD, WHITELIST_MAIN_DOMAINS, leathalKeywords, lowKeywords, sensitiveKeywords } = __require('../constants/phishing-score-source') as typeof import('../constants/phishing-score-source');
       const { BLACK_TLD, WHITELIST_MAIN_DOMAINS, leathalKeywords, lowKeywords, sensitiveKeywords } = __require('../constants/phishing-score-source') as typeof import('../constants/phishing-score-source');
 
 
       const domainCountMap = new Map<string, number>();
       const domainCountMap = new Map<string, number>();
-      const domainScoreMap: Record<string, number> = {};
+      const domainScoreMap: Record<string, number> = Object.create(null);
 
 
       let line = '';
       let line = '';
       let tld: string | null = '';
       let tld: string | null = '';
@@ -72,6 +72,9 @@ const pool = new Worktank({
           console.log(picocolors.yellow('[phishing domains] E0002'), 'missing domain', { line, apexDomain });
           console.log(picocolors.yellow('[phishing domains] E0002'), 'missing domain', { line, apexDomain });
           continue;
           continue;
         }
         }
+        if (WHITELIST_MAIN_DOMAINS.has(apexDomain)) {
+          continue;
+        }
 
 
         domainCountMap.set(
         domainCountMap.set(
           apexDomain,
           apexDomain,
@@ -80,37 +83,38 @@ const pool = new Worktank({
             : 1
             : 1
         );
         );
 
 
+        let score = apexDomain in domainScoreMap ? domainScoreMap[apexDomain] : 0;
+
         if (!(apexDomain in domainScoreMap)) {
         if (!(apexDomain in domainScoreMap)) {
-          domainScoreMap[apexDomain] = 0;
           if (BLACK_TLD.has(tld)) {
           if (BLACK_TLD.has(tld)) {
-            domainScoreMap[apexDomain] += 3;
+            score += 3;
           } else if (tld.length > 6) {
           } else if (tld.length > 6) {
-            domainScoreMap[apexDomain] += 2;
+            score += 2;
           }
           }
           if (apexDomain.length >= 18) {
           if (apexDomain.length >= 18) {
-            domainScoreMap[apexDomain] += 0.5;
+            score += 0.5;
           }
           }
         }
         }
 
 
         subdomain = parsed.subdomain;
         subdomain = parsed.subdomain;
 
 
-        if (
-          subdomain
-          && !WHITELIST_MAIN_DOMAINS.has(apexDomain)
-        ) {
-          domainScoreMap[apexDomain] += calcDomainAbuseScore(subdomain, line);
+        if (subdomain) {
+          score += calcDomainAbuseScore(subdomain, line);
         }
         }
+
+        domainScoreMap[apexDomain] = score;
       }
       }
 
 
       domainCountMap.forEach((count, apexDomain) => {
       domainCountMap.forEach((count, apexDomain) => {
+        const score = domainScoreMap[apexDomain];
         if (
         if (
         // !WHITELIST_MAIN_DOMAINS.has(apexDomain)
         // !WHITELIST_MAIN_DOMAINS.has(apexDomain)
-          (domainScoreMap[apexDomain] >= 24)
-          || (domainScoreMap[apexDomain] >= 16 && count >= 7)
-          || (domainScoreMap[apexDomain] >= 13 && count >= 11)
-          || (domainScoreMap[apexDomain] >= 5 && count >= 14)
-          || (domainScoreMap[apexDomain] >= 3 && count >= 21)
-          || (domainScoreMap[apexDomain] >= 1 && count >= 60)
+          (score >= 24)
+          || (score >= 16 && count >= 7)
+          || (score >= 13 && count >= 11)
+          || (score >= 5 && count >= 14)
+          || (score >= 3 && count >= 21)
+          || (score >= 1 && count >= 60)
         ) {
         ) {
           domainArr.push('.' + apexDomain);
           domainArr.push('.' + apexDomain);
         }
         }

+ 1 - 1
Build/lib/parse-filter/filters.ts

@@ -196,7 +196,7 @@ export function parse($line: string, result: [string, ParseType], includeThirdPa
     return result;
     return result;
   }
   }
 
 
-  const filter = NetworkFilter.parse(line);
+  const filter = NetworkFilter.parse(line, false);
   if (filter) {
   if (filter) {
     if (
     if (
       // filter.isCosmeticFilter() // always false
       // filter.isCosmeticFilter() // always false