Browse Source

Add stable domain sort

SukkaW 2 years ago
parent
commit
975aa326ef

+ 2 - 1
Build/build-domestic-ruleset.js

@@ -5,6 +5,7 @@ const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
 const { processLine } = require('./lib/process-line');
 const { processLine } = require('./lib/process-line');
 const { withBannerArray } = require('./lib/with-banner');
 const { withBannerArray } = require('./lib/with-banner');
 const { compareAndWriteFile } = require('./lib/string-array-compare');
 const { compareAndWriteFile } = require('./lib/string-array-compare');
+const domainSorter = require('./lib/stable-sort-domain');
 
 
 (async () => {
 (async () => {
   const rl = readFileByLine(path.resolve(__dirname, '../Source/non_ip/domestic.conf'));
   const rl = readFileByLine(path.resolve(__dirname, '../Source/non_ip/domestic.conf'));
@@ -20,7 +21,7 @@ const { compareAndWriteFile } = require('./lib/string-array-compare');
     ...Object.entries(DOMESTICS)
     ...Object.entries(DOMESTICS)
       .filter(([key]) => key !== 'SYSTEM')
       .filter(([key]) => key !== 'SYSTEM')
       .flatMap(([, { domains }]) => domains)
       .flatMap(([, { domains }]) => domains)
-      .sort()
+      .sort(domainSorter)
       .map((domain) => `DOMAIN-SUFFIX,${domain}`)
       .map((domain) => `DOMAIN-SUFFIX,${domain}`)
   );
   );
 
 

+ 9 - 6
Build/build-phishing-domainset.js

@@ -1,9 +1,10 @@
-const tldts = require('tldts');
+const { parse } = require('tldts');
 const { processFilterRules } = require('./lib/parse-filter.js');
 const { processFilterRules } = require('./lib/parse-filter.js');
 const path = require('path');
 const path = require('path');
 const { withBannerArray } = require('./lib/with-banner.js');
 const { withBannerArray } = require('./lib/with-banner.js');
 const { compareAndWriteFile } = require('./lib/string-array-compare');
 const { compareAndWriteFile } = require('./lib/string-array-compare');
 const { processLine } = require('./lib/process-line.js');
 const { processLine } = require('./lib/process-line.js');
+const domainSorter = require('./lib/stable-sort-domain');
 
 
 const WHITELIST_DOMAIN = new Set([
 const WHITELIST_DOMAIN = new Set([
   'w3s.link',
   'w3s.link',
@@ -13,7 +14,7 @@ const WHITELIST_DOMAIN = new Set([
   'business.site',
   'business.site',
   'page.link', // Firebase URL Shortener
   'page.link', // Firebase URL Shortener
   'fleek.cool',
   'fleek.cool',
-  'notion.site' 
+  'notion.site'
 ]);
 ]);
 const BLACK_TLD = new Set([
 const BLACK_TLD = new Set([
   'xyz',
   'xyz',
@@ -68,7 +69,9 @@ const BLACK_TLD = new Set([
 
 
     const domain = line.charCodeAt(0) === 46 ? line.slice(1) : line;
     const domain = line.charCodeAt(0) === 46 ? line.slice(1) : line;
 
 
-    const apexDomain = tldts.getDomain(domain, { allowPrivateDomains: true });
+    const parsed = parse(domain, { allowPrivateDomains: true });
+
+    const apexDomain = parsed.domain;
 
 
     if (apexDomain) {
     if (apexDomain) {
       if (WHITELIST_DOMAIN.has(apexDomain)) {
       if (WHITELIST_DOMAIN.has(apexDomain)) {
@@ -94,7 +97,7 @@ const BLACK_TLD = new Set([
         domainCountMap[apexDomain] += (isPhishingDomainMockingAmazon ? 4.5 : 0.5);
         domainCountMap[apexDomain] += (isPhishingDomainMockingAmazon ? 4.5 : 0.5);
       }
       }
 
 
-      const tld = tldts.getPublicSuffix(domain, { allowPrivateDomains: true });
+      const tld = parsed.publicSuffix;
       if (!tld || !BLACK_TLD.has(tld)) continue;
       if (!tld || !BLACK_TLD.has(tld)) continue;
 
 
       domainCountMap[apexDomain] += 1;
       domainCountMap[apexDomain] += 1;
@@ -114,7 +117,7 @@ const BLACK_TLD = new Set([
         }
         }
 
 
         if (domainCountMap[apexDomain] < 5) {
         if (domainCountMap[apexDomain] < 5) {
-          const subdomain = tldts.getSubdomain(domain, { allowPrivateDomains: true });
+          const subdomain = parsed.subdomain;
           if (subdomain && subdomain.includes('.')) {
           if (subdomain && subdomain.includes('.')) {
             domainCountMap[apexDomain] += 1.5;
             domainCountMap[apexDomain] += 1.5;
           }
           }
@@ -134,7 +137,7 @@ const BLACK_TLD = new Set([
     }
     }
   });
   });
 
 
-  results.sort();
+  results.sort(domainSorter);
 
 
   await compareAndWriteFile(
   await compareAndWriteFile(
     withBannerArray(
     withBannerArray(

+ 2 - 23
Build/build-reject-domainset.js

@@ -4,7 +4,6 @@ const fse = require('fs-extra');
 
 
 const { resolve: pathResolve } = require('path');
 const { resolve: pathResolve } = require('path');
 const { processHosts, processFilterRules } = require('./lib/parse-filter');
 const { processHosts, processFilterRules } = require('./lib/parse-filter');
-const { getDomain } = require('tldts');
 const Trie = require('./lib/trie');
 const Trie = require('./lib/trie');
 
 
 const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source');
 const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source');
@@ -14,6 +13,7 @@ const { processLine } = require('./lib/process-line');
 const { domainDeduper } = require('./lib/domain-deduper');
 const { domainDeduper } = require('./lib/domain-deduper');
 const createKeywordFilter = require('./lib/aho-corasick');
 const createKeywordFilter = require('./lib/aho-corasick');
 const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
 const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
+const domainSorter = require('./lib/stable-sort-domain');
 
 
 /** Whitelists */
 /** Whitelists */
 const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
 const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
@@ -188,29 +188,8 @@ const domainSuffixSet = new Set();
   /** @type {Record<string, number>} */
   /** @type {Record<string, number>} */
   const rejectDomainsStats = {};
   const rejectDomainsStats = {};
 
 
-  const sorter = (a, b) => {
-    if (a.domain > b.domain) {
-      return 1;
-    }
-    if (a.domain < b.domain) {
-      return -1;
-    }
-    if (a.v > b.v) {
-      return 1;
-    }
-    if (a.v < b.v) {
-      return -1;
-    }
-    return 0;
-  };
   const sortedDomainSets = dudupedDominArray
   const sortedDomainSets = dudupedDominArray
-    .map((v) => {
-      const domain = getDomain(v[0] === '.' ? v.slice(1) : v) || v;
-      rejectDomainsStats[domain] = (rejectDomainsStats[domain] || 0) + 1;
-      return { v, domain };
-    })
-    .sort(sorter)
-    .map((i) => i.v);
+    .sort(domainSorter);
 
 
   await Promise.all([
   await Promise.all([
     compareAndWriteFile(
     compareAndWriteFile(

+ 2 - 1
Build/build-speedtest-domainset.js

@@ -3,6 +3,7 @@ const { domainDeduper } = require('./lib/domain-deduper');
 const path = require('path');
 const path = require('path');
 const { withBannerArray } = require('./lib/with-banner.js');
 const { withBannerArray } = require('./lib/with-banner.js');
 const { compareAndWriteFile } = require('./lib/string-array-compare');
 const { compareAndWriteFile } = require('./lib/string-array-compare');
+const domainSorter = require('./lib/stable-sort-domain');
 
 
 const { Sema } = require('async-sema');
 const { Sema } = require('async-sema');
 const s = new Sema(2);
 const s = new Sema(2);
@@ -100,7 +101,7 @@ const querySpeedtestApi = async (keyword) => {
     }
     }
   }
   }
 
 
-  const reduped = domainDeduper(Array.from(domains)).sort();
+  const reduped = domainDeduper(Array.from(domains)).sort(domainSorter);
 
 
   await compareAndWriteFile(
   await compareAndWriteFile(
     withBannerArray(
     withBannerArray(

+ 0 - 10
Build/lib/parse-filter.js

@@ -399,16 +399,6 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart
   };
   };
 }
 }
 
 
-/**
- * @param {string[]} data
- */
-function preprocessFullDomainSetBeforeUsedAsWorkerData(data) {
-  return data
-    .filter(domain => domain[0] === '.')
-    .sort((a, b) => a.length - b.length);
-}
-
 module.exports.processDomainLists = processDomainLists;
 module.exports.processDomainLists = processDomainLists;
 module.exports.processHosts = processHosts;
 module.exports.processHosts = processHosts;
 module.exports.processFilterRules = processFilterRules;
 module.exports.processFilterRules = processFilterRules;
-module.exports.preprocessFullDomainSetBeforeUsedAsWorkerData = preprocessFullDomainSetBeforeUsedAsWorkerData;

+ 104 - 0
Build/lib/stable-sort-domain.js

@@ -0,0 +1,104 @@
+// @ts-check
+const tldts = require('tldts');
+
+const cache1 = Object.create(null);
+/**
+ * @param {string} url
+ * @returns {ReturnType<typeof tldts.parse>}
+ */
+// eslint-disable-next-line no-return-assign -- cache
+const parse = (url) => (cache1[url] ||= tldts.parse(url, { allowPrivateDomains: true }));
+
+/**
+ * @param {string} a
+ * @param {string} b
+ * @returns {0 | 1 | -1}
+ */
+const domainSorter = (a, b) => {
+  if (a === b) return 0;
+
+  const aParsed = parse(a);
+  const bParsed = parse(b);
+
+  const aSuffix = aParsed.publicSuffix;
+  const bSuffix = bParsed.publicSuffix;
+
+  if (bSuffix !== aSuffix) {
+    if (bSuffix == null) {
+      return 1;
+    }
+    if (aSuffix == null) {
+      return -1;
+    }
+
+    for (let i = 0, l = aSuffix.length; i < l; i++) {
+      if (bSuffix[i] == null) {
+        return 1;
+      }
+
+      if (aSuffix[i] < bSuffix[i]) {
+        return -1;
+      }
+
+      if (aSuffix[i] > bSuffix[i]) {
+        return 1;
+      }
+    }
+  }
+
+  const aDomainWithoutSuffix = aParsed.domainWithoutSuffix;
+  const bDomainWithoutSuffix = bParsed.domainWithoutSuffix;
+
+  if (aDomainWithoutSuffix !== bDomainWithoutSuffix) {
+    if (bDomainWithoutSuffix == null) {
+      return 1;
+    }
+    if (aDomainWithoutSuffix == null) {
+      return -1;
+    }
+
+    for (let i = 0, l = aDomainWithoutSuffix.length; i < l; i++) {
+      if (bDomainWithoutSuffix[i] == null) {
+        return 1;
+      }
+
+      if (aDomainWithoutSuffix[i] < bDomainWithoutSuffix[i]) {
+        return -1;
+      }
+
+      if (aDomainWithoutSuffix[i] > bDomainWithoutSuffix[i]) {
+        return 1;
+      }
+    }
+  }
+
+  const aSubdomain = aParsed.subdomain;
+  const bSubdomain = bParsed.subdomain;
+
+  if (aSubdomain !== bSubdomain) {
+    if (bSubdomain == null) {
+      return 1;
+    }
+    if (aSubdomain == null) {
+      return -1;
+    }
+
+    for (let i = 0, l = aSubdomain.length; i < l; i++) {
+      if (bSubdomain[i] == null) {
+        return 1;
+      }
+
+      if (aSubdomain[i] < bSubdomain[i]) {
+        return -1;
+      }
+
+      if (aSubdomain[i] > bSubdomain[i]) {
+        return 1;
+      }
+    }
+  }
+
+  return 0;
+};
+
+module.exports = domainSorter;

+ 26 - 24
List/domainset/download.conf

@@ -2,21 +2,23 @@
 .1fichier.info
 .1fichier.info
 .nitro.download
 .nitro.download
 
 
-# >> SourceForge
+# Microsoft .NET Runtime
+download.visualstudio.microsoft.com
+# SourceForge
 downloads.sourceforge.net
 downloads.sourceforge.net
 .dl.sourceforge.net
 .dl.sourceforge.net
-# >> Atlassian
+# Atlassian
 product-downloads.atlassian.com
 product-downloads.atlassian.com
-# >> Mokee
+# Mokee
 .download.mokeedev.com
 .download.mokeedev.com
-# >> Pixel Experience
+# Pixel Experience
 get.pixelexperience.org
 get.pixelexperience.org
 download.pixelexperience.org
 download.pixelexperience.org
-# >> MEGA
+# MEGA
 .mega.nz
 .mega.nz
 .mega.io
 .mega.io
 .mega.co.nz
 .mega.co.nz
-# >> Filen
+# Filen
 down.filen.net
 down.filen.net
 down.filen-1.net
 down.filen-1.net
 down.filen-2.net
 down.filen-2.net
@@ -24,44 +26,44 @@ down.filen-3.net
 down.filen-4.net
 down.filen-4.net
 down.filen-5.net
 down.filen-5.net
 down.filen.io
 down.filen.io
-# >> APKMirror
+# APKMirror
 downloadr2.apkmirror.com
 downloadr2.apkmirror.com
-# >> Parallels, Inc.
+# Parallels, Inc.
 download.parallels.com
 download.parallels.com
-# >> OrbStack
+# OrbStack
 cdn-updates.orbstack.dev
 cdn-updates.orbstack.dev
-# >> VSCode
+# VSCode
 update.code.visualstudio.com
 update.code.visualstudio.com
 download.visualstudio.microsoft.com
 download.visualstudio.microsoft.com
 az764295.vo.msecnd.net
 az764295.vo.msecnd.net
-# >> XMind
+# XMind
 dl2.xmind.net
 dl2.xmind.net
 dl3.xmind.net
 dl3.xmind.net
-# >> PostMan
+# PostMan
 dl.pstmn.io
 dl.pstmn.io
-# >> Surge
+# Surge
 dl.nssurge.com
 dl.nssurge.com
-# >> Docker
+# Docker
 desktop.docker.com
 desktop.docker.com
-# >> Setapp
+# Setapp
 dl.devmate.com
 dl.devmate.com
 store.setapp.com
 store.setapp.com
-# >> Parsec
+# Parsec
 builds.parsec.app
 builds.parsec.app
-# >> Sketch
+# Sketch
 download.sketch.com
 download.sketch.com
-# >> Wireshark
+# Wireshark
 .dl.wireshark.org
 .dl.wireshark.org
-# >> Mozilla
+# Mozilla
 download.mozilla.org
 download.mozilla.org
-# >> AnyDesk
+# AnyDesk
 download.anydesk.com
 download.anydesk.com
-# >> Arc
+# Arc
 releases.arc.net
 releases.arc.net
-# >> App Uninstaller & Cleaner
+# App Uninstaller & Cleaner
 download.nektony.com
 download.nektony.com
-# >> Beeper
+# Beeper
 download.beeper.com
 download.beeper.com
 download.todesktop.com
 download.todesktop.com
-# >> Motrix
+# Motrix
 dl.motrix.app
 dl.motrix.app

+ 2 - 0
Source/domainset/cdn.conf

@@ -317,6 +317,8 @@ amp.azure.net
 
 
 # >> CodeSandbox
 # >> CodeSandbox
 uploads.codesandbox.io
 uploads.codesandbox.io
+screenshots.codesandbox.io
+prod-packager-packages.codesandbox.io
 pkg.csb.dev
 pkg.csb.dev
 # Sandpack
 # Sandpack
 sandpack-cdn-staging.blazingly.io
 sandpack-cdn-staging.blazingly.io

+ 2 - 0
Source/non_ip/cdn.conf

@@ -57,4 +57,6 @@ DOMAIN-SUFFIX,s3.us-west-2.amazonaws.com
 DOMAIN-KEYWORD,web-assets.zendesk
 DOMAIN-KEYWORD,web-assets.zendesk
 # >> Cloudinary
 # >> Cloudinary
 DOMAIN-KEYWORD,-res.cloudinary.com
 DOMAIN-KEYWORD,-res.cloudinary.com
+# >> Algolia
+DOMAIN-KEYWORD,dsn.algolia.net
 # --- [AWS S3 Replace Me] ---
 # --- [AWS S3 Replace Me] ---