瀏覽代碼

Perf: speed up build

SukkaW 2 年之前
父節點
當前提交
d5850aa84b

+ 10 - 3
Build/build-anti-bogus-domain.js

@@ -6,7 +6,7 @@ const { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } = require('.
 const { processLine } = require('./lib/process-line');
 const { task } = require('./lib/trace-runner');
 
-const buildAntiBogusDomain = task(__filename, async () => {
+const getBogusNxDomainIPs = async () => {
   /** @type {string[]} */
   const res = [];
   for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/bogus-nxdomain.china.conf')) {
@@ -14,20 +14,27 @@ const buildAntiBogusDomain = task(__filename, async () => {
       res.push(line.replace('bogus-nxdomain=', ''));
     }
   }
+  return res;
+};
 
+const buildAntiBogusDomain = task(__filename, async () => {
   const filePath = path.resolve(__dirname, '../Source/ip/reject.conf');
 
+  const bogusIpPromise = getBogusNxDomainIPs();
+
   /** @type {string[]} */
   const result = [];
   for await (const line of readFileByLine(filePath)) {
     if (line === '# --- [Anti Bogus Domain Replace Me] ---') {
-      res.forEach(ip => {
+      (await bogusIpPromise).forEach(ip => {
         if (isIPv4(ip)) {
           result.push(`IP-CIDR,${ip}/32,no-resolve`);
         } else if (isIPv6(ip)) {
           result.push(`IP-CIDR6,${ip}/128,no-resolve`);
         }
       });
+
+      continue;
     } else {
       const l = processLine(line);
       if (l) {
@@ -47,7 +54,7 @@ const buildAntiBogusDomain = task(__filename, async () => {
     ' - https://github.com/felixonmars/dnsmasq-china-list'
   ];
 
-  await Promise.all(createRuleset(
+  return Promise.all(createRuleset(
     'Sukka\'s Ruleset - Anti Bogus Domain',
     description,
     new Date(),

+ 1 - 1
Build/build-apple-cdn.js

@@ -20,7 +20,7 @@ const buildAppleCdn = task(__filename, async () => {
   const ruleset = res.map(domain => `DOMAIN-SUFFIX,${domain}`);
   const domainset = res.map(i => `.${i}`);
 
-  await Promise.all([
+  return Promise.all([
     ...createRuleset(
       'Sukka\'s Ruleset - Apple CDN',
       description,

+ 10 - 4
Build/build-cdn-conf.js

@@ -9,7 +9,7 @@ const { processLine } = require('./lib/process-line');
 
 const publicSuffixPath = path.resolve(__dirname, '../node_modules/.cache/public_suffix_list_dat.txt');
 
-const buildCdnConf = task(__filename, async () => {
+const getS3OSSDomains = async () => {
   const trie = new Trie();
 
   if (fs.existsSync(publicSuffixPath)) {
@@ -46,13 +46,19 @@ const buildCdnConf = task(__filename, async () => {
     }
   });
 
+  return S3OSSDomains;
+};
+
+const buildCdnConf = task(__filename, async () => {
   /** @type {string[]} */
   const cdnDomainsList = [];
+
+  const getS3OSSDomainsPromise = getS3OSSDomains();
+
   for await (const l of readFileByLine(path.resolve(__dirname, '../Source/non_ip/cdn.conf'))) {
     if (l === '# --- [AWS S3 Replace Me] ---') {
-      S3OSSDomains.forEach(domain => {
-        cdnDomainsList.push(`DOMAIN-SUFFIX,${domain}`);
-      });
+      (await getS3OSSDomainsPromise).forEach(domain => { cdnDomainsList.push(`DOMAIN-SUFFIX,${domain}`); });
+      continue;
     }
     const line = processLine(l);
     if (line) {

+ 6 - 14
Build/build-chn-cidr.js

@@ -3,7 +3,7 @@ const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remot
 const { resolve: pathResolve } = require('path');
 // This should not use `createRuleset` API since we are going to generate ipcidr for Clash
 const { compareAndWriteFile, withBannerArray } = require('./lib/create-file');
-const { processLine } = require('./lib/process-line');
+const { processLineFromReadline } = require('./lib/process-line');
 const { task } = require('./lib/trace-runner');
 
 // https://github.com/misakaio/chnroutes2/issues/25
@@ -13,20 +13,12 @@ const EXCLUDE_CIDRS = [
 ];
 
 const buildChnCidr = task(__filename, async () => {
-  const { exclude: excludeCidrs } = await import('cidr-tools-wasm');
-
-  /** @type {string[]} */
-  const cidr = [];
-  for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')) {
-    const l = processLine(line);
-    if (l) {
-      cidr.push(l);
-    }
-  }
+  const [{ exclude: excludeCidrs }, cidr] = await Promise.all([
+    import('cidr-tools-wasm'),
+    processLineFromReadline(await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt'))
+  ]);
 
-  console.log('Before Merge:', cidr.length);
   const filteredCidr = excludeCidrs(cidr, EXCLUDE_CIDRS, true);
-  console.log('After Merge:', filteredCidr.length);
 
   const description = [
     'License: CC BY-SA 2.0',
@@ -36,7 +28,7 @@ const buildChnCidr = task(__filename, async () => {
     'Data from https://misaka.io (misakaio @ GitHub)'
   ];
 
-  await Promise.all([
+  return Promise.all([
     compareAndWriteFile(
       withBannerArray(
         'Sukka\'s Ruleset - Mainland China IPv4 CIDR',

+ 3 - 3
Build/build-common.js

@@ -17,7 +17,7 @@ const outputSurgeDir = path.resolve(__dirname, '../List');
 const outputClashDir = path.resolve(__dirname, '../Clash');
 
 const buildCommon = task(__filename, async () => {
-  /** @type {Promise<void>[]} */
+  /** @type {Promise<unknown>[]} */
   const promises = [];
 
   const pw = new PathScurry(sourceDir);
@@ -107,7 +107,7 @@ async function transformDomainset(sourcePath, relativePath) {
     )
   ];
 
-  await Promise.all(createRuleset(
+  return Promise.all(createRuleset(
     title,
     description,
     new Date(),
@@ -140,7 +140,7 @@ async function transformRuleset(sourcePath, relativePath) {
     )
   ];
 
-  await Promise.all(createRuleset(
+  return Promise.all(createRuleset(
     title,
     description,
     new Date(),

+ 4 - 15
Build/build-domestic-ruleset.js

@@ -2,33 +2,22 @@
 const path = require('path');
 const { DOMESTICS } = require('../Source/non_ip/domestic');
 const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
-const { processLine } = require('./lib/process-line');
+const { processLineFromReadline } = require('./lib/process-line');
 const { compareAndWriteFile, createRuleset } = require('./lib/create-file');
-const domainSorter = require('./lib/stable-sort-domain');
 const { task } = require('./lib/trace-runner');
 
 const buildDomesticRuleset = task(__filename, async () => {
-  const rl = readFileByLine(path.resolve(__dirname, '../Source/non_ip/domestic.conf'));
-  const results = [];
-  for await (const l of rl) {
-    const line = processLine(l);
-    if (line) {
-      results.push(line);
-    }
-  }
+  const results = await processLineFromReadline(readFileByLine(path.resolve(__dirname, '../Source/non_ip/domestic.conf')));
 
   results.push(
     ...Object.entries(DOMESTICS)
       .reduce(
         (acc, [key, { domains }]) => {
-          if (key === 'SYSTEM') {
-            return acc;
-          }
+          if (key === 'SYSTEM') return acc;
           return [...acc, ...domains];
         },
         /** @type {string[]} */([])
       )
-      .sort(domainSorter)
       .map((domain) => `DOMAIN-SUFFIX,${domain}`)
   );
 
@@ -40,7 +29,7 @@ const buildDomesticRuleset = task(__filename, async () => {
     'This file contains known addresses that are avaliable in the Mainland China.'
   ];
 
-  await Promise.all([
+  return Promise.all([
     ...createRuleset(
       'Sukka\'s Ruleset - Domestic Domains',
       rulesetDescription,

+ 11 - 4
Build/build-internal-cdn-rules.js

@@ -4,9 +4,11 @@ const path = require('path');
 const tldts = require('tldts');
 const { processLine } = require('./lib/process-line');
 const { readFileByLine } = require('./lib/fetch-remote-text-by-line');
-const domainSorter = require('./lib/stable-sort-domain');
+const { createDomainSorter } = require('./lib/stable-sort-domain');
 const { task } = require('./lib/trace-runner');
 const { compareAndWriteFile } = require('./lib/create-file');
+const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix');
+const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse');
 
 /**
  * @param {string} string
@@ -19,11 +21,15 @@ const buildInternalCDNDomains = task(__filename, async () => {
   const set = new Set();
   const keywords = new Set();
 
+  const gorhill = await getGorhillPublicSuffixPromise();
+  const getDomain = createCachedGorhillGetDomain(gorhill);
+  const domainSorter = createDomainSorter(gorhill);
+
   /**
    * @param {string} input
    */
   const addApexDomain = (input) => {
-    const d = tldts.getDomain(input, { allowPrivateDomains: true });
+    const d = getDomain(input);
     if (d) {
       set.add(d);
     }
@@ -35,7 +41,8 @@ const buildInternalCDNDomains = task(__filename, async () => {
   const processLocalDomainSet = async (domainSetPath) => {
     for await (const line of readFileByLine(domainSetPath)) {
       const parsed = tldts.parse(line, { allowPrivateDomains: true });
-      if (!parsed.isIp && (parsed.isIcann || parsed.isPrivate)) {
+      if (parsed.isIp) continue;
+      if (parsed.isIcann || parsed.isPrivate) {
         if (parsed.domain) {
           set.add(parsed.domain);
         }
@@ -80,7 +87,7 @@ const buildInternalCDNDomains = task(__filename, async () => {
     fse.ensureDir(path.resolve(__dirname, '../List/internal'))
   ]);
 
-  await compareAndWriteFile(
+  return compareAndWriteFile(
     [
       ...Array.from(set).sort(domainSorter).map(i => `SUFFIX,${i}`),
       ...Array.from(keywords).sort().map(i => `REGEX,${i}`)

+ 1 - 1
Build/build-internal-chn-domains.js

@@ -11,7 +11,7 @@ const buildInternalChnDomains = task(__filename, async () => {
     fse.ensureDir(path.resolve(__dirname, '../List/internal'))
   ]);
 
-  await compareAndWriteFile(
+  return compareAndWriteFile(
     result.map(line => `SUFFIX,${line}`),
     path.resolve(__dirname, '../List/internal/accelerated-china-domains.txt')
   );

+ 7 - 13
Build/build-internal-reverse-chn-cidr.js

@@ -1,6 +1,6 @@
 // @ts-check
 const { fetchRemoteTextAndCreateReadlineInterface } = require('./lib/fetch-remote-text-by-line');
-const { processLine } = require('./lib/process-line');
+const { processLineFromReadline } = require('./lib/process-line');
 const path = require('path');
 const fse = require('fs-extra');
 const fs = require('fs');
@@ -25,16 +25,11 @@ const RESERVED_IPV4_CIDR = [
 ];
 
 const buildInternalReverseChnCIDR = task(__filename, async () => {
-  const { exclude } = await import('cidr-tools-wasm');
-
-  /** @type {string[]} */
-  const cidr = [];
-  for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')) {
-    const l = processLine(line);
-    if (l) {
-      cidr.push(l);
-    }
-  }
+  const [{ exclude }, cidr] = await Promise.all([
+    import('cidr-tools-wasm'),
+    processLineFromReadline(await fetchRemoteTextAndCreateReadlineInterface('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt')),
+    fse.ensureDir(path.resolve(__dirname, '../List/internal'))
+  ]);
 
   const reversedCidr = exclude(
     ['0.0.0.0/0'],
@@ -42,8 +37,7 @@ const buildInternalReverseChnCIDR = task(__filename, async () => {
     true
   );
 
-  await fse.ensureDir(path.resolve(__dirname, '../List/internal'));
-  await fs.promises.writeFile(
+  return fs.promises.writeFile(
     path.resolve(__dirname, '../List/internal/reversed-chn-cidr.txt'),
     `${reversedCidr.join('\n')}\n`
   );

+ 74 - 53
Build/build-phishing-domainset.js

@@ -1,10 +1,14 @@
-const tldts = require('tldts');
+// @ts-check
 const { processFilterRules } = require('./lib/parse-filter.js');
 const path = require('path');
 const { createRuleset } = require('./lib/create-file');
 const { processLine } = require('./lib/process-line.js');
-const domainSorter = require('./lib/stable-sort-domain');
+const { createDomainSorter } = require('./lib/stable-sort-domain');
 const { traceSync, task } = require('./lib/trace-runner.js');
+const Trie = require('./lib/trie.js');
+const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix.js');
+const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse.js');
+const tldts = require('tldts');
 
 const WHITELIST_DOMAIN = new Set([
   'w3s.link',
@@ -61,77 +65,94 @@ const BLACK_TLD = new Set([
 ]);
 
 const buildPhishingDomainSet = task(__filename, async () => {
-  const domainSet = Array.from((await processFilterRules(
-    'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
-    // [
-    //   'https://malware-filter.gitlab.io/phishing-filter/phishing-filter-agh.txt',
-    //   'https://malware-filter.pages.dev/phishing-filter-agh.txt',
-    //   'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
-    // ]
-  )).black);
+  const [{ black: domainSet }, gorhill] = await Promise.all([
+    processFilterRules(
+      'https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt',
+      [
+        'https://malware-filter.gitlab.io/phishing-filter/phishing-filter-agh.txt',
+        'https://malware-filter.pages.dev/phishing-filter-agh.txt',
+        'https://phishing-filter.pages.dev/phishing-filter-agh.txt'
+      ]
+    ),
+    getGorhillPublicSuffixPromise()
+  ]);
+
+  traceSync('* whitelist', () => {
+    const trieForRemovingWhiteListed = Trie.from(domainSet);
+    WHITELIST_DOMAIN.forEach(white => {
+      trieForRemovingWhiteListed.find(`.${white}`, false).forEach(f => domainSet.delete(f));
+      if (trieForRemovingWhiteListed.has(white)) {
+        domainSet.delete(white);
+      }
+    });
+  });
+
   const domainCountMap = {};
+  const getDomain = createCachedGorhillGetDomain(gorhill);
 
   traceSync('* process domain set', () => {
-    for (let i = 0, len = domainSet.length; i < len; i++) {
-      const line = processLine(domainSet[i]);
-      if (!line) continue;
+    const domainArr = Array.from(domainSet);
 
-      const parsed = tldts.parse(line, { allowPrivateDomains: true });
-      const apexDomain = parsed.domain;
+    for (let i = 0, len = domainArr.length; i < len; i++) {
+      const line = processLine(domainArr[i]);
+      if (!line) continue;
 
-      if (apexDomain) {
-        if (WHITELIST_DOMAIN.has(apexDomain)) {
-          continue;
-        }
+      const apexDomain = getDomain(line);
+      if (!apexDomain) continue;
 
-        domainCountMap[apexDomain] ||= 0;
+      domainCountMap[apexDomain] ||= 0;
 
-        let isPhishingDomainMockingAmazon = false;
-        if (line.startsWith('.amaz')) {
-          domainCountMap[apexDomain] += 0.5;
+      const isPhishingDomainMockingCoJp = line.includes('-co-jp');
+      if (isPhishingDomainMockingCoJp) {
+        domainCountMap[apexDomain] += 0.5;
+      }
 
-          isPhishingDomainMockingAmazon = true;
+      if (line.startsWith('.amaz')) {
+        domainCountMap[apexDomain] += 0.5;
 
-          if (line.startsWith('.amazon-')) {
-            domainCountMap[apexDomain] += 4.5;
-          }
-        } else if (line.startsWith('.customer')) {
-          domainCountMap[apexDomain] += 0.25;
+        if (line.startsWith('.amazon-')) {
+          domainCountMap[apexDomain] += 4.5;
         }
-        if (line.includes('-co-jp')) {
-          domainCountMap[apexDomain] += (isPhishingDomainMockingAmazon ? 4.5 : 0.5);
+        if (isPhishingDomainMockingCoJp) {
+          domainCountMap[apexDomain] += 4;
         }
+      } else if (line.startsWith('.customer')) {
+        domainCountMap[apexDomain] += 0.25;
+      }
 
-        const tld = parsed.publicSuffix;
-        if (!tld || !BLACK_TLD.has(tld)) continue;
+      const tld = gorhill.getPublicSuffix(line[0] === '.' ? line.slice(1) : line);
+      if (!tld || !BLACK_TLD.has(tld)) continue;
 
-        domainCountMap[apexDomain] += 1;
+      domainCountMap[apexDomain] += 1;
 
-        if (line.length > 19) {
-          // Add more weight if the domain is long enough
-          if (line.length > 44) {
-            domainCountMap[apexDomain] += 3.5;
-          } else if (line.length > 34) {
-            domainCountMap[apexDomain] += 2.5;
-          } else if (line.length > 29) {
-            domainCountMap[apexDomain] += 1.5;
-          } else if (line.length > 24) {
-            domainCountMap[apexDomain] += 0.75;
-          } else if (line.length > 19) {
-            domainCountMap[apexDomain] += 0.25;
-          }
+      const lineLen = line.length;
 
-          if (domainCountMap[apexDomain] < 5) {
-            const subdomain = parsed.subdomain;
-            if (subdomain?.includes('.')) {
-              domainCountMap[apexDomain] += 1.5;
-            }
+      if (lineLen > 19) {
+        // Add more weight if the domain is long enough
+        if (lineLen > 44) {
+          domainCountMap[apexDomain] += 3.5;
+        } else if (lineLen > 34) {
+          domainCountMap[apexDomain] += 2.5;
+        } else if (lineLen > 29) {
+          domainCountMap[apexDomain] += 1.5;
+        } else if (lineLen > 24) {
+          domainCountMap[apexDomain] += 0.75;
+        } else {
+          domainCountMap[apexDomain] += 0.25;
+        }
+
+        if (domainCountMap[apexDomain] < 5) {
+          const subdomain = tldts.getSubdomain(line);
+          if (subdomain?.includes('.')) {
+            domainCountMap[apexDomain] += 1.5;
           }
         }
       }
     }
   });
 
+  const domainSorter = createDomainSorter(gorhill);
+
   const results = traceSync('* get final results', () => Object.entries(domainCountMap)
     .reduce((acc, [apexDomain, count]) => {
       if (count >= 5) {
@@ -151,7 +172,7 @@ const buildPhishingDomainSet = task(__filename, async () => {
     ' - https://gitlab.com/malware-filter/phishing-filter'
   ];
 
-  await Promise.all(createRuleset(
+  return Promise.all(createRuleset(
     'Sukka\'s Ruleset - Reject Phishing',
     description,
     new Date(),

+ 1 - 1
Build/build-public.js

@@ -28,7 +28,7 @@ const buildPublicHtml = task(__filename, async () => {
 
   const html = template(list);
 
-  await fs.promises.writeFile(path.join(publicPath, 'index.html'), html, 'utf-8');
+  return fs.promises.writeFile(path.join(publicPath, 'index.html'), html, 'utf-8');
 });
 
 module.exports.buildPublicHtml = buildPublicHtml;

+ 1 - 1
Build/build-reject-domainset.js

@@ -208,7 +208,7 @@ const buildRejectDomainSet = task(__filename, async () => {
     ...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`)
   ];
 
-  await Promise.all([
+  return Promise.all([
     ...createRuleset(
       'Sukka\'s Ruleset - Reject Base',
       description,

+ 8 - 2
Build/build-speedtest-domainset.js

@@ -65,7 +65,13 @@ const buildSpeedtestDomainSet = task(__filename, async () => {
     '.speedtest.idv.tw',
     '.speedtest.frontier.com',
     '.speedtest.orange.fr',
-    '.speedtest.centurylink.net'
+    '.speedtest.centurylink.net',
+    '.srvr.bell.ca',
+    '.speedtest.contabo.net',
+    'speedtest.hk.chinamobile.com',
+    'speedtestbb.hk.chinamobile.com',
+    '.hizinitestet.com',
+    '.linknetspeedtest.net.br'
   ]);
 
   const hostnameGroups = await Promise.all([
@@ -114,7 +120,7 @@ const buildSpeedtestDomainSet = task(__filename, async () => {
     'GitHub: https://github.com/SukkaW/Surge'
   ];
 
-  await Promise.all(createRuleset(
+  return Promise.all(createRuleset(
     'Sukka\'s Ruleset - Speedtest Domains',
     description,
     new Date(),

+ 9 - 9
Build/build-telegram-cidr.js

@@ -17,14 +17,14 @@ const buildTelegramCIDR = task(__filename, async () => {
 
   for await (const line of createReadlineInterfaceFromResponse(resp)) {
     const cidr = processLine(line);
-    if (cidr) {
-      const [subnet] = cidr.split('/');
-      if (isIPv4(subnet)) {
-        results.push(`IP-CIDR,${cidr},no-resolve`);
-      }
-      if (isIPv6(subnet)) {
-        results.push(`IP-CIDR6,${cidr},no-resolve`);
-      }
+    if (!cidr) continue;
+
+    const [subnet] = cidr.split('/');
+    if (isIPv4(subnet)) {
+      results.push(`IP-CIDR,${cidr},no-resolve`);
+    }
+    if (isIPv6(subnet)) {
+      results.push(`IP-CIDR6,${cidr},no-resolve`);
     }
   }
 
@@ -40,7 +40,7 @@ const buildTelegramCIDR = task(__filename, async () => {
     ' - https://core.telegram.org/resources/cidr.txt'
   ];
 
-  await Promise.all(createRuleset(
+  return Promise.all(createRuleset(
     'Sukka\'s Ruleset - Telegram IP CIDR',
     description,
     date,

+ 2 - 9
Build/download-previous-build.js

@@ -33,7 +33,6 @@ const downloadPreviousBuild = task(__filename, async () => {
       if (!isCI) {
         allFileExists = fs.existsSync(join(__dirname, '..', line));
         if (!allFileExists) {
-          console.log(`File not exists: ${line}`);
           break;
         }
       }
@@ -73,33 +72,27 @@ const downloadPreviousBuild = task(__filename, async () => {
   await Promise.all(filesList.map(async p => {
     const src = join(extractedPath, 'Surge-gh-pages', p);
     if (await fileExists(src)) {
-      const dst = join(__dirname, '..', p);
-      console.log('Copy', { src, dst });
       return fse.copy(
         src,
         join(__dirname, '..', p),
         { overwrite: true }
       );
     }
-
-    console.log('File not exists:', src);
   }));
 
-  await fs.promises.unlink(extractedPath).catch(() => { });
+  return fs.promises.unlink(extractedPath).catch(() => { });
 });
 
 const downloadPublicSuffixList = task(__filename, async () => {
   const publicSuffixDir = resolve(__dirname, '../node_modules/.cache');
   const publicSuffixPath = join(publicSuffixDir, 'public_suffix_list_dat.txt');
 
-  console.log('Download public suffix list.');
-
   const [resp] = await Promise.all([
     fetch('https://publicsuffix.org/list/public_suffix_list.dat'),
     fse.ensureDir(publicSuffixDir)
   ]);
 
-  await pipeline(
+  return pipeline(
     Readable.fromWeb(resp.body),
     fs.createWriteStream(publicSuffixPath)
   );

+ 35 - 3
Build/index.js

@@ -1,3 +1,5 @@
+// @ts-check
+
 const { downloadPreviousBuild, downloadPublicSuffixList } = require('./download-previous-build');
 const { buildCommon } = require('./build-common');
 const { buildAntiBogusDomain } = require('./build-anti-bogus-domain');
@@ -47,7 +49,7 @@ const requireWorker = (path) => {
  * @param {WithWorker<T>} worker
  */
 const endWorker = async (worker) => {
-  const { forceExited } = worker.end();
+  const { forceExited } = await worker.end();
   if (forceExited && worker.__sukka_worker_name) {
     console.log(worker.__sukka_worker_name, 'forceExited');
   }
@@ -72,7 +74,10 @@ const endWorker = async (worker) => {
     downloadPublicSuffixListPromise
   ]).then(() => buildCdnConf());
   // build:phishing-domainset
-  const buildPhilishingDomainsetPromise = downloadPreviousBuildPromise.then(() => buildPhishingDomainSet());
+  const buildPhilishingDomainsetPromise = Promise.all([
+    downloadPreviousBuildPromise,
+    downloadPublicSuffixListPromise
+  ]).then(() => buildPhishingDomainSet());
   // build:reject-domainset
   const buildRejectDomainSetPromise = Promise.all([
     downloadPreviousBuildPromise,
@@ -87,6 +92,7 @@ const endWorker = async (worker) => {
   const buildSpeedtestDomainSetPromise = downloadPreviousBuildPromise.then(() => buildSpeedtestDomainSet());
   // build:internal-cdn-rules
   const buildInternalCDNDomainsPromise = Promise.all([
+    downloadPublicSuffixListPromise,
     buildCommonPromise,
     buildCdnConfPromise
   ]).then(() => buildInternalCDNDomains());
@@ -97,7 +103,7 @@ const endWorker = async (worker) => {
   // build:domestic-ruleset
   const buildDomesticRulesetPromise = downloadPreviousBuildPromise.then(() => buildDomesticRuleset());
 
-  await Promise.all([
+  const stats = await Promise.all([
     downloadPreviousBuildPromise,
     downloadPublicSuffixListPromise,
     buildCommonPromise,
@@ -120,4 +126,30 @@ const endWorker = async (worker) => {
     validate(),
     endWorker(buildInternalReverseChnCIDRWorker)
   ]);
+
+  printStats(stats);
 })();
+
+/**
+ * @param {Array<{ start: number, end: number, taskName: string }>} stats
+ */
+function printStats(stats) {
+  // sort stats by start time
+  stats.sort((a, b) => a.start - b.start);
+
+  const longestTaskName = Math.max(...stats.map(i => i.taskName.length));
+  const realStart = Math.min(...stats.map(i => i.start));
+  const realEnd = Math.max(...stats.map(i => i.end));
+
+  const totalMs = realEnd - realStart;
+
+  const statsStep = (totalMs / 160) | 0;
+
+  stats.forEach(stat => {
+    console.log(
+      `[${stat.taskName}]${' '.repeat(longestTaskName - stat.taskName.length)}`,
+      ' '.repeat(((stat.start - realStart) / statsStep) | 0),
+      '='.repeat(Math.max(((stat.end - stat.start) / statsStep) | 0, 1))
+    );
+  });
+}

+ 2 - 4
Build/lib/cached-tld-parse.js

@@ -9,9 +9,7 @@ const sharedConfig = { allowPrivateDomains: true };
  * @param {string} domain
  * @returns {ReturnType<import('tldts').parse>}
  */
-module.exports.parse = (domain) => {
-  return cache.sync(domain, () => tldts.parse(domain, sharedConfig));
-};
+module.exports.parse = (domain) => cache.sync(domain, () => tldts.parse(domain, sharedConfig));
 
 let gothillGetDomainCache = null;
 /**
@@ -22,5 +20,5 @@ module.exports.createCachedGorhillGetDomain = (gorhill) => {
   /**
    * @param {string} domain
    */
-  return (domain) => gothillGetDomainCache.sync(domain, () => gorhill.getDomain(domain[0] === '.' ? domain.slice(1) : domain));
+  return (domain) => (/** @type {ReturnType<typeof createCache>} */ (gothillGetDomainCache)).sync(domain, () => gorhill.getDomain(domain[0] === '.' ? domain.slice(1) : domain));
 };

+ 7 - 9
Build/lib/get-gorhill-publicsuffix.js

@@ -3,13 +3,6 @@ const fs = require('fs');
 const path = require('path');
 
 const publicSuffixPath = path.resolve(__dirname, '../../node_modules/.cache/public_suffix_list_dat.txt');
-const getPublicSuffixListDat = () => {
-  if (fs.existsSync(publicSuffixPath)) {
-    return fs.promises.readFile(publicSuffixPath, 'utf-8');
-  }
-  console.log('public_suffix_list.dat not found, fetch directly from remote.');
-  return fetch('https://publicsuffix.org/list/public_suffix_list.dat').then(r => r.text());
-};
 
 const getGorhillPublicSuffix = async () => {
   const customFetch = async (url) => {
@@ -20,7 +13,12 @@ const getGorhillPublicSuffix = async () => {
   };
 
   const [publicSuffixListDat, { default: gorhill }] = await Promise.all([
-    getPublicSuffixListDat(),
+    fs.existsSync(publicSuffixPath)
+      ? fs.promises.readFile(publicSuffixPath, 'utf-8')
+      : fetch('https://publicsuffix.org/list/public_suffix_list.dat').then(r => {
+        console.log('public_suffix_list.dat not found, fetch directly from remote.');
+        return r.text();
+      }),
     import('gorhill-publicsuffixlist')
   ]);
 
@@ -30,7 +28,7 @@ const getGorhillPublicSuffix = async () => {
   return gorhill;
 };
 
-/** @type {Promise<import('gorhill-publicsuffixlist').default | null>} */
+/** @type {Promise<import('gorhill-publicsuffixlist').default> | null} */
 let gorhillPublicSuffixPromise = null;
 module.exports.getGorhillPublicSuffixPromise = () => {
   gorhillPublicSuffixPromise ||= getGorhillPublicSuffix();

+ 0 - 24
Build/lib/is-domain-loose.js

@@ -1,24 +0,0 @@
-// @ts-check
-const tldts = require('./cached-tld-parse');
-/**
- * @param {string | null | undefined} domain
- */
-module.exports.normalizeDomain = (domain) => {
-  if (!domain) {
-    return null;
-  }
-
-  const { isIcann, isPrivate, hostname, isIp } = tldts.parse(domain);
-  if (isIp) {
-    return null;
-  }
-
-  if (isIcann || isPrivate) {
-    if (hostname?.[0] === '.') {
-      return hostname.slice(1);
-    }
-    return hostname;
-  }
-
-  return null;
-};

+ 17 - 1
Build/lib/parse-filter.js

@@ -1,8 +1,8 @@
 // @ts-check
 const { fetchWithRetry } = require('./fetch-retry');
+const tldts = require('tldts');
 const { fetchRemoteTextAndCreateReadlineInterface } = require('./fetch-remote-text-by-line');
 const { NetworkFilter } = require('@cliqz/adblocker');
-const { normalizeDomain } = require('./is-domain-loose');
 const { processLine } = require('./process-line');
 const { performance } = require('perf_hooks');
 
@@ -19,6 +19,22 @@ const warnOnce = (url, isWhite, ...message) => {
   console.warn(url, isWhite ? '(white)' : '(black)', ...message);
 };
 
+const normalizeDomain = (domain) => {
+  if (!domain) return null;
+
+  const { isIcann, isPrivate, hostname, isIp } = tldts.parse(domain);
+  if (isIp) return null;
+
+  if (isIcann || isPrivate) {
+    if (hostname?.[0] === '.') {
+      return hostname.slice(1);
+    }
+    return hostname;
+  }
+
+  return null;
+};
+
 /**
  * @param {string | URL} domainListsUrl
  */

+ 17 - 1
Build/lib/process-line.js

@@ -6,7 +6,7 @@
  *
  * @param {string} line
  */
-module.exports.processLine = (line) => {
+const processLine = (line) => {
   if (!line) {
     return null;
   }
@@ -30,3 +30,19 @@ module.exports.processLine = (line) => {
 
   return trimmed;
 };
+module.exports.processLine = processLine;
+
+/**
+ * @param {import('readline').ReadLine} rl
+ */
+module.exports.processLineFromReadline = async (rl) => {
+  /** @type {string[]} */
+  const res = [];
+  for await (const line of rl) {
+    const l = processLine(line);
+    if (l) {
+      res.push(l);
+    }
+  }
+  return res;
+};

+ 7 - 2
Build/lib/trace-runner.js

@@ -40,8 +40,13 @@ module.exports.traceAsync = traceAsync;
  */
 module.exports.task = (__filename, fn, customname = null) => {
   const taskName = customname ?? path.basename(__filename, path.extname(__filename));
-  return () => {
+  return async () => {
     console.log(`🏃 [${taskName}] Start executing`);
-    return traceAsync(`✅ [${taskName}] Executed successfully`, fn);
+    const start = performance.now();
+    await fn();
+    const end = performance.now();
+    console.log(`✅ [${taskName}] Executed successfully: ${(end - start).toFixed(3)}ms`);
+
+    return { start, end, taskName };
   };
 };

+ 8 - 7
Build/validate-domainset.js

@@ -59,14 +59,15 @@ const _validateRuleset = async (filePath) => {
 };
 
 const validate = task(__filename, async () => {
-  const [domainsetFiles, _rulesetFiles] = await Promise.all([
-    listDir(path.resolve(__dirname, '../List/domainset')),
-    listDir(path.resolve(__dirname, '../List/non_ip'))
-  ]);
-  await Promise.all(
-    domainsetFiles.map(file => validateDomainSet(file))
+  // const [domainsetFiles, _rulesetFiles] = await Promise.all([
+  //   listDir(path.resolve(__dirname, '../List/domainset')),
+  //   listDir(path.resolve(__dirname, '../List/non_ip'))
+  // ]);
+  return Promise.all([
+    listDir(path.resolve(__dirname, '../List/domainset'))
+      .then(domainsetFiles => Promise.all(domainsetFiles.map(file => validateDomainSet(file))))
     // rulesetFiles.map(file => validateRuleset(file))
-  );
+  ]);
 });
 module.exports.validate = validate;