Browse Source

Perf: tldts allow loose input

SukkaW 1 year ago
parent
commit
3d676289f3
3 changed files with 3 additions and 75 deletions
  1. 0 64
      Build/build-internal-cdn-rules.ts
  2. 0 6
      Build/index.ts
  3. 3 5
      Build/lib/get-phishing-domains.ts

+ 0 - 64
Build/build-internal-cdn-rules.ts

@@ -1,64 +0,0 @@
-import path from 'path';
-import { processLine } from './lib/process-line';
-import { readFileByLine } from './lib/fetch-text-by-line';
-import { sortDomains } from './lib/stable-sort-domain';
-import { task } from './trace';
-import { compareAndWriteFile } from './lib/create-file';
-import { domainDeduper } from './lib/domain-deduper';
-import { sort } from './lib/timsort';
-
-const escapeRegExp = (string = '') => string.replaceAll(/[$()*+.?[\\\]^{|}]/g, '\\$&');
-
-const processLocalDomainSet = async (domainSetPath: string, set: Set<string>) => {
-  for await (const l of readFileByLine(domainSetPath)) {
-    const line = processLine(l);
-    if (line) {
-      set.add(line[0] === '.' ? line.slice(1) : line);
-    }
-  }
-};
-
-const processLocalRuleSet = async (ruleSetPath: string, set: Set<string>, keywords: Set<string>) => {
-  for await (const line of readFileByLine(ruleSetPath)) {
-    if (line.startsWith('DOMAIN-SUFFIX,')) {
-      set.add(line.slice(14));
-    } else if (line.startsWith('DOMAIN,')) {
-      set.add(line.slice(7));
-    } else if (line.startsWith('DOMAIN-KEYWORD')) {
-      keywords.add(escapeRegExp(line.slice(15)));
-    } else if (line.includes('USER-AGENT,') || line.includes('PROCESS-NAME,') || line.includes('URL-REGEX,') || line.includes('DOMAIN-WILDCARD')) {
-      // do nothing
-    } else if (processLine(line)) {
-      console.warn('[drop line from ruleset]', line);
-    }
-  }
-};
-
-export const buildInternalCDNDomains = task(import.meta.path, async (span) => {
-  const proxySet = new Set<string>();
-  const proxyKeywords = new Set<string>();
-
-  await Promise.all([
-    processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/cdn.conf'), proxySet, proxyKeywords),
-    processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global.conf'), proxySet, proxyKeywords),
-    processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/my_proxy.conf'), proxySet, proxyKeywords),
-    processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/my_plus.conf'), proxySet, proxyKeywords),
-    processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/stream.conf'), proxySet, proxyKeywords),
-    processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/telegram.conf'), proxySet, proxyKeywords),
-    processLocalDomainSet(path.resolve(import.meta.dir, '../List/domainset/cdn.conf'), proxySet),
-    processLocalDomainSet(path.resolve(import.meta.dir, '../List/domainset/download.conf'), proxySet)
-  ]);
-
-  return compareAndWriteFile(
-    span,
-    [
-      ...sortDomains(domainDeduper(Array.from(proxySet))).map(i => `SUFFIX,${i}`),
-      ...sort(Array.from(proxyKeywords)).map(i => `REGEX,${i}`)
-    ],
-    path.resolve(import.meta.dir, '../Internal/cdn.txt')
-  );
-});
-
-if (import.meta.main) {
-  buildInternalCDNDomains();
-}

+ 0 - 6
Build/index.ts

@@ -9,7 +9,6 @@ import { buildRejectDomainSet } from './build-reject-domainset';
 import { buildTelegramCIDR } from './build-telegram-cidr';
 import { buildChnCidr } from './build-chn-cidr';
 import { buildSpeedtestDomainSet } from './build-speedtest-domainset';
-import { buildInternalCDNDomains } from './build-internal-cdn-rules';
 import { buildInternalReverseChnCIDR } from './build-internal-reverse-chn-cidr';
 import { buildDomesticRuleset } from './build-domestic-ruleset';
 import { buildStreamService } from './build-stream-service';
@@ -51,10 +50,6 @@ process.on('unhandledRejection', (reason) => {
     const buildTelegramCIDRPromise = downloadPreviousBuildPromise.then(() => buildTelegramCIDR(rootSpan));
     const buildChnCidrPromise = downloadPreviousBuildPromise.then(() => buildChnCidr(rootSpan));
     const buildSpeedtestDomainSetPromise = downloadPreviousBuildPromise.then(() => buildSpeedtestDomainSet(rootSpan));
-    const buildInternalCDNDomainsPromise = Promise.all([
-      buildCommonPromise,
-      buildCdnConfPromise
-    ]).then(() => buildInternalCDNDomains(rootSpan));
 
     const buildInternalReverseChnCIDRPromise = buildInternalReverseChnCIDR(rootSpan);
 
@@ -86,7 +81,6 @@ process.on('unhandledRejection', (reason) => {
       buildTelegramCIDRPromise,
       buildChnCidrPromise,
       buildSpeedtestDomainSetPromise,
-      buildInternalCDNDomainsPromise,
       buildInternalReverseChnCIDRPromise,
       buildInternalReverseChnCIDRPromise,
       // buildInternalChnDomainsPromise,

+ 3 - 5
Build/lib/get-phishing-domains.ts

@@ -118,20 +118,18 @@ export const getPhishingDomains = (parentSpan: Span) => parentSpan.traceChild('g
     for (let i = 0, len = domainArr.length; i < len; i++) {
       const line = domainArr[i];
 
-      const safeGorhillLine = line[0] === '.' ? line.slice(1) : line;
-
       const {
         publicSuffix: tld,
         domain: apexDomain,
         subdomain
-      } = parse(safeGorhillLine, looseTldtsOpt);
+      } = parse(line, looseTldtsOpt);
 
       if (!tld) {
-        console.log(picocolors.yellow('[phishing domains] E0001'), 'missing tld', { line, safeGorhillLine, tld });
+        console.log(picocolors.yellow('[phishing domains] E0001'), 'missing tld', { line, tld });
         continue;
       }
       if (!apexDomain) {
-        console.log(picocolors.yellow('[phishing domains] E0002'), 'missing domain', { line, safeGorhillLine, apexDomain });
+        console.log(picocolors.yellow('[phishing domains] E0002'), 'missing domain', { line, apexDomain });
         continue;
       }