ソースを参照

Perf: use tldts-experimental when possible

SukkaW 1 年間 前
コミット
1288460c48

+ 4 - 1
Build/build-reject-domainset.ts

@@ -11,7 +11,10 @@ import createKeywordFilter from './lib/aho-corasick';
 import { readFileByLine, readFileIntoProcessedArray } from './lib/fetch-text-by-line';
 import { sortDomains } from './lib/stable-sort-domain';
 import { task } from './trace';
-import * as tldts from 'tldts';
+// tldts-experimental is way faster than tldts, but very little bit inaccurate
+// (since it is hashes based). But the result is still deterministic, which is
+// enough when creating a simple stat of reject hosts.
+import * as tldts from 'tldts-experimental';
 import { SHARED_DESCRIPTION } from './lib/constants';
 import { getPhishingDomains } from './lib/get-phishing-domains';
 

+ 2 - 2
Build/build-sgmodule-redirect.ts

@@ -1,7 +1,7 @@
 import path from 'path';
 import { task } from './trace';
 import { compareAndWriteFile } from './lib/create-file';
-import * as tldts from 'tldts';
+import { getHostname } from 'tldts';
 
 function escapeRegExp(string = '') {
   const reRegExpChar = /[$()*+.?[\\\]^{|}]/g;
@@ -122,7 +122,7 @@ export const buildRedirectModule = task(import.meta.path, async (span) => {
   const domains = Array.from(
     new Set(
       [
-        ...REDIRECT_MIRROR.map(([from]) => tldts.getHostname(from, { detectIp: false })),
+        ...REDIRECT_MIRROR.map(([from]) => getHostname(from, { detectIp: false })),
         ...REDIRECT_FAKEWEBSITES.flatMap(([from]) => [from, `www.${from}`])
       ]
     )

+ 2 - 3
Build/build-speedtest-domainset.ts

@@ -4,11 +4,10 @@ import { createRuleset } from './lib/create-file';
 import { sortDomains } from './lib/stable-sort-domain';
 
 import { Sema } from 'async-sema';
-import * as tldts from 'tldts';
+import { getHostname } from 'tldts';
 import { task } from './trace';
 import { fetchWithRetry } from './lib/fetch-retry';
 import { SHARED_DESCRIPTION } from './lib/constants';
-import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
 import picocolors from 'picocolors';
 import { fetchRemoteTextByLine } from './lib/fetch-text-by-line';
 import { processLine } from './lib/process-line';
@@ -64,7 +63,7 @@ const querySpeedtestApi = async (keyword: string): Promise<Array<string | null>>
         }
       })).then(r => r.json()).then((data: Array<{ url: string }>) => data.reduce<string[]>(
         (prev, cur) => {
-          const hn = tldts.getHostname(cur.url, { detectIp: false });
+          const hn = getHostname(cur.url, { detectIp: false });
           if (hn) {
             prev.push(hn);
           }

+ 2 - 2
Build/lib/get-phishing-domains.ts

@@ -1,6 +1,6 @@
 import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
 import { processDomainLists } from './parse-filter';
-import * as tldts from 'tldts';
+import { getSubdomain } from 'tldts';
 import { TTL } from './cache-filesystem';
 
 import { add as SetAdd } from 'mnemonist/set';
@@ -177,7 +177,7 @@ export function calcDomainAbuseScore(line: string) {
     }
   }
 
-  const subdomain = tldts.getSubdomain(line, { detectIp: false });
+  const subdomain = getSubdomain(line, { detectIp: false });
 
   if (subdomain) {
     if (subdomain.slice(1).includes('.')) {

+ 2 - 2
Build/lib/normalize-domain.ts

@@ -1,10 +1,10 @@
-import * as tldts from 'tldts';
+import { parse as tldtsParse } from 'tldts';
 import { isProbablyIpv4 } from './is-fast-ip';
 export const normalizeDomain = (domain: string) => {
   if (!domain) return null;
   if (isProbablyIpv4(domain)) return null;
 
-  const parsed = tldts.parse(domain, { allowPrivateDomains: true, detectIp: false });
+  const parsed = tldtsParse(domain, { allowPrivateDomains: true, detectIp: false });
   // if (parsed.isIp) return null;
   if (!parsed.hostname) return null;
   if (!parsed.isIcann && !parsed.isPrivate) return null;

+ 2 - 2
Build/lib/parse-dnsmasq.ts

@@ -1,8 +1,8 @@
 import { fetchRemoteTextByLine } from './fetch-text-by-line';
-import { parse } from 'tldts';
+import { parse as tldtsParse } from 'tldts';
 
 const isDomainLoose = (domain: string): boolean => {
-  const { isIcann, isPrivate, isIp } = parse(domain);
+  const { isIcann, isPrivate, isIp } = tldtsParse(domain);
   return !!(!isIp && (isIcann || isPrivate));
 };
 

+ 16 - 11
Build/lib/stable-sort-domain.ts

@@ -1,4 +1,7 @@
-import * as tldts from 'tldts';
+// tldts-experimental is way faster than tldts, but very little bit inaccurate
+// (since it is hashes based). But the result is still deterministic, which is
+// enough when sorting.
+import * as tldts from 'tldts-experimental';
 import { sort } from './timsort';
 
 export const compare = (a: string, b: string) => {
@@ -6,11 +9,11 @@ export const compare = (a: string, b: string) => {
   return (a.length - b.length) || a.localeCompare(b);
 };
 
-const tldtsOpt = {
-  extractHostname: false,
+const tldtsOpt: Parameters<typeof tldts.getDomain>[1] = {
   allowPrivateDomains: false,
-  detectIp: false,
+  extractHostname: false,
   validateHostname: false,
+  detectIp: false,
   mixedInputs: false
 };
 
@@ -36,14 +39,16 @@ export const sortDomains = (inputs: string[]) => {
     const main_domain_a = domainMap.get(a)!;
     const main_domain_b = domainMap.get(b)!;
 
-    let t = compare(main_domain_a, main_domain_b);
+    let t = compare(
+      main_domain_a,
+      main_domain_b
+    ) || compare(
+      /** subdomain_a */ subdomainMap.get(a)!,
+      /** subdomain_b */ subdomainMap.get(b)!
+    );
+    if (t !== 0) return t;
 
-    if (t === 0) {
-      const subdomain_a = subdomainMap.get(a)!;
-      const subdomain_b = subdomainMap.get(b)!;
-      t = compare(subdomain_a, subdomain_b);
-    }
-    if (t === 0 && (a !== main_domain_a || b !== main_domain_b)) {
+    if (a !== main_domain_a || b !== main_domain_b) {
       t = compare(a, b);
     }
 

BIN
bun.lockb


+ 2 - 1
package.json

@@ -29,7 +29,8 @@
     "punycode": "^2.3.1",
     "table": "^6.8.2",
     "tar-stream": "^3.1.7",
-    "tldts": "^6.1.19"
+    "tldts": "^6.1.19",
+    "tldts-experimental": "^6.1.21"
   },
   "devDependencies": {
     "@eslint-sukka/node": "6.0.0-beta.3",