Browse Source

Refactor: improve stable sort domains

SukkaW 1 year ago
parent
commit
22d738d99d

+ 3 - 4
Build/build-internal-cdn-rules.ts

@@ -39,8 +39,7 @@ export const buildInternalCDNDomains = task(import.meta.path, async (span) => {
   const proxySet = new Set<string>();
   const proxySet = new Set<string>();
   const proxyKeywords = new Set<string>();
   const proxyKeywords = new Set<string>();
 
 
-  const gorhill = (await Promise.all([
-    getGorhillPublicSuffixPromise(),
+  await Promise.all([
     processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/cdn.conf'), proxySet, proxyKeywords),
     processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/cdn.conf'), proxySet, proxyKeywords),
     processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global.conf'), proxySet, proxyKeywords),
     processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global.conf'), proxySet, proxyKeywords),
     processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/my_proxy.conf'), proxySet, proxyKeywords),
     processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/my_proxy.conf'), proxySet, proxyKeywords),
@@ -49,12 +48,12 @@ export const buildInternalCDNDomains = task(import.meta.path, async (span) => {
     processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/telegram.conf'), proxySet, proxyKeywords),
     processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/telegram.conf'), proxySet, proxyKeywords),
     processLocalDomainSet(path.resolve(import.meta.dir, '../List/domainset/cdn.conf'), proxySet),
     processLocalDomainSet(path.resolve(import.meta.dir, '../List/domainset/cdn.conf'), proxySet),
     processLocalDomainSet(path.resolve(import.meta.dir, '../List/domainset/download.conf'), proxySet)
     processLocalDomainSet(path.resolve(import.meta.dir, '../List/domainset/download.conf'), proxySet)
-  ]))[0];
+  ]);
 
 
   return compareAndWriteFile(
   return compareAndWriteFile(
     span,
     span,
     [
     [
-      ...sortDomains(domainDeduper(Array.from(proxySet)), gorhill).map(i => `SUFFIX,${i}`),
+      ...sortDomains(domainDeduper(Array.from(proxySet))).map(i => `SUFFIX,${i}`),
       ...sort(Array.from(proxyKeywords)).map(i => `REGEX,${i}`)
       ...sort(Array.from(proxyKeywords)).map(i => `REGEX,${i}`)
     ],
     ],
     path.resolve(import.meta.dir, '../Internal/cdn.txt')
     path.resolve(import.meta.dir, '../Internal/cdn.txt')

+ 1 - 3
Build/build-reject-domainset.ts

@@ -21,8 +21,6 @@ import { setAddFromArray } from './lib/set-add-from-array';
 import { sort } from './lib/timsort';
 import { sort } from './lib/timsort';
 
 
 export const buildRejectDomainSet = task(import.meta.path, async (span) => {
 export const buildRejectDomainSet = task(import.meta.path, async (span) => {
-  const gorhill = await getGorhillPublicSuffixPromise();
-
   /** Whitelists */
   /** Whitelists */
   const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
   const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
 
 
@@ -178,7 +176,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
       'Sukka\'s Ruleset - Reject Base',
       'Sukka\'s Ruleset - Reject Base',
       description,
       description,
       new Date(),
       new Date(),
-      span.traceChildSync('sort reject domainset', () => sortDomains(dudupedDominArray, gorhill)),
+      span.traceChildSync('sort reject domainset', () => sortDomains(dudupedDominArray)),
       'domainset',
       'domainset',
       path.resolve(import.meta.dir, '../List/domainset/reject.conf'),
       path.resolve(import.meta.dir, '../List/domainset/reject.conf'),
       path.resolve(import.meta.dir, '../Clash/domainset/reject.txt')
       path.resolve(import.meta.dir, '../Clash/domainset/reject.txt')

+ 14 - 15
Build/build-speedtest-domainset.ts

@@ -83,16 +83,16 @@ const querySpeedtestApi = async (keyword: string): Promise<Array<string | null>>
   }
   }
 };
 };
 
 
-// const getPreviousSpeedtestDomainsPromise = createMemoizedPromise(async () => {
-//   const domains = new Set<string>();
-//   for await (const l of await fetchRemoteTextByLine('https://ruleset.skk.moe/List/domainset/speedtest.conf')) {
-//     const line = processLine(l);
-//     if (line) {
-//       domains.add(line);
-//     }
-//   }
-//   return domains;
-// });
+const getPreviousSpeedtestDomainsPromise = createMemoizedPromise(async () => {
+  const domains = new Set<string>();
+  for await (const l of await fetchRemoteTextByLine('https://ruleset.skk.moe/List/domainset/speedtest.conf')) {
+    const line = processLine(l);
+    if (line) {
+      domains.add(line);
+    }
+  }
+  return domains;
+});
 
 
 export const buildSpeedtestDomainSet = task(import.meta.path, async (span) => {
 export const buildSpeedtestDomainSet = task(import.meta.path, async (span) => {
   // Predefined domainset
   // Predefined domainset
@@ -183,9 +183,9 @@ export const buildSpeedtestDomainSet = task(import.meta.path, async (span) => {
     'mensura.cdn-apple.com' // From netQuality command
     'mensura.cdn-apple.com' // From netQuality command
   ]);
   ]);
 
 
-  // await span.traceChildAsync('fetch previous speedtest domainset', async () => {
-  //   SetHelpers.add(domains, await getPreviousSpeedtestDomainsPromise());
-  // });
+  await span.traceChildAsync('fetch previous speedtest domainset', async () => {
+    SetHelpers.add(domains, await getPreviousSpeedtestDomainsPromise());
+  });
 
 
   await new Promise<void>((resolve) => {
   await new Promise<void>((resolve) => {
     const pMap = ([
     const pMap = ([
@@ -245,8 +245,7 @@ export const buildSpeedtestDomainSet = task(import.meta.path, async (span) => {
     });
     });
   });
   });
 
 
-  const gorhill = await getGorhillPublicSuffixPromise();
-  const deduped = span.traceChildSync('sort result', () => sortDomains(domainDeduper(Array.from(domains)), gorhill));
+  const deduped = span.traceChildSync('sort result', () => sortDomains(domainDeduper(Array.from(domains))));
 
 
   const description = [
   const description = [
     ...SHARED_DESCRIPTION,
     ...SHARED_DESCRIPTION,

+ 1 - 2
Build/lib/stable-sort-domain.bench.ts

@@ -7,10 +7,9 @@ import { bench, group, run } from 'mitata';
 
 
 (async () => {
 (async () => {
   const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
   const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
-  const gorhill = await getGorhillPublicSuffixPromise();
 
 
   group('sortDomains', () => {
   group('sortDomains', () => {
-    bench('run', () => sortDomains(data, gorhill));
+    bench('run', () => sortDomains(data));
   });
   });
 
 
   run();
   run();

+ 74 - 33
Build/lib/stable-sort-domain.test.ts

@@ -1,51 +1,92 @@
 // eslint-disable-next-line import-x/no-unresolved -- bun
 // eslint-disable-next-line import-x/no-unresolved -- bun
 import { describe, expect, it } from 'bun:test';
 import { describe, expect, it } from 'bun:test';
 
 
-import { sortDomains } from './stable-sort-domain';
-import { getGorhillPublicSuffixPromise } from './get-gorhill-publicsuffix';
+import { compare, sortDomains } from './stable-sort-domain';
 
 
-describe('sortDomains', () => {
-  it('basic', async () => {
-    const gorhill = await getGorhillPublicSuffixPromise();
+describe('compare', () => {
+  it('basic', () => {
+    expect(
+      compare('.s3-website.ap-northeast-3.amazonaws.com', '.s3.dualstack.ap-south-1.amazonaws.com')
+    ).toBe(1);
+  });
 
 
+  it('basic', () => {
     expect(
     expect(
-      sortDomains([
-        '.s3-website.ap-northeast-3.amazonaws.com',
-        '.s3.dualstack.ap-south-1.amazonaws.com',
-        '.s3-website.af-south-1.amazonaws.com'
-      ], gorhill)
-    ).toStrictEqual(
-      sortDomains([
-        '.s3.dualstack.ap-south-1.amazonaws.com',
-        '.s3-website.ap-northeast-3.amazonaws.com',
-        '.s3-website.af-south-1.amazonaws.com'
-      ], gorhill)
-    );
+      compare('.s3-website.ap-northeast-3.amazonaws.com', '.s3.dualstack.ap-south-1.amazonaws.com')
+    ).toBe(1);
+  });
+});
+
+describe('sortDomains', () => {
+  it('basic', () => {
+    expect(sortDomains([
+      '.s3-website.ap-northeast-3.amazonaws.com',
+      '.s3.dualstack.ap-south-1.amazonaws.com',
+      '.s3-website.af-south-1.amazonaws.com'
+    ])).toStrictEqual([
+      '.s3-website.af-south-1.amazonaws.com',
+      '.s3.dualstack.ap-south-1.amazonaws.com',
+      '.s3-website.ap-northeast-3.amazonaws.com'
+    ]);
+
+    expect(sortDomains([
+      '.s3.dualstack.ap-south-1.amazonaws.com',
+      '.s3-website.ap-northeast-3.amazonaws.com',
+      '.s3-website.af-south-1.amazonaws.com'
+    ])).toStrictEqual([
+      '.s3-website.af-south-1.amazonaws.com',
+      '.s3.dualstack.ap-south-1.amazonaws.com',
+      '.s3-website.ap-northeast-3.amazonaws.com'
+    ]);
+
+    expect(sortDomains([
+      '.s3-website-us-west-2.amazonaws.com',
+      '.s3-1.amazonaws.com'
+    ])).toStrictEqual([
+      '.s3-1.amazonaws.com',
+      '.s3-website-us-west-2.amazonaws.com'
+    ]);
+
+    expect(sortDomains([
+      '.s3-1.amazonaws.com',
+      '.s3-website-us-west-2.amazonaws.com'
+    ])).toStrictEqual([
+      '.s3-1.amazonaws.com',
+      '.s3-website-us-west-2.amazonaws.com'
+    ]);
 
 
     expect(
     expect(
       sortDomains([
       sortDomains([
-        '.s3-website-us-west-2.amazonaws.com',
-        '.s3-1.amazonaws.com'
-      ], gorhill)
-    ).toStrictEqual(
-      sortDomains([
-        '.s3-1.amazonaws.com',
-        '.s3-website-us-west-2.amazonaws.com'
-      ], gorhill)
-    );
+        '.s3-deprecated.us-west-2.amazonaws.com',
+        '.s3-accesspoint.dualstack.us-west-2.amazonaws.com',
+        '.s3.dualstack.us-west-2.amazonaws.com'
+      ])
+    ).toStrictEqual([
+      '.s3.dualstack.us-west-2.amazonaws.com',
+      '.s3-deprecated.us-west-2.amazonaws.com',
+      '.s3-accesspoint.dualstack.us-west-2.amazonaws.com'
+    ]);
 
 
     expect(
     expect(
       sortDomains([
       sortDomains([
         '.s3-deprecated.us-west-2.amazonaws.com',
         '.s3-deprecated.us-west-2.amazonaws.com',
         '.s3-accesspoint.dualstack.us-west-2.amazonaws.com',
         '.s3-accesspoint.dualstack.us-west-2.amazonaws.com',
         '.s3.dualstack.us-west-2.amazonaws.com'
         '.s3.dualstack.us-west-2.amazonaws.com'
-      ], gorhill)
-    ).toStrictEqual(
+      ])
+    ).toStrictEqual([
+      '.s3.dualstack.us-west-2.amazonaws.com',
+      '.s3-deprecated.us-west-2.amazonaws.com',
+      '.s3-accesspoint.dualstack.us-west-2.amazonaws.com'
+    ]);
+
+    expect(
       sortDomains([
       sortDomains([
-        '.s3-accesspoint.dualstack.us-west-2.amazonaws.com',
-        '.s3.dualstack.us-west-2.amazonaws.com',
-        '.s3-deprecated.us-west-2.amazonaws.com'
-      ], gorhill)
-    );
+        '.ec2-25-58-215-234.us-east-2.compute.amazonaws.com',
+        '.ec2-13-58-215-234.us-east-2.compute.amazonaws.com'
+      ])
+    ).toStrictEqual([
+      '.ec2-13-58-215-234.us-east-2.compute.amazonaws.com',
+      '.ec2-25-58-215-234.us-east-2.compute.amazonaws.com'
+    ]);
   });
   });
 });
 });

+ 8 - 6
Build/lib/stable-sort-domain.ts

@@ -1,7 +1,7 @@
-import type { PublicSuffixList } from '@gorhill/publicsuffixlist';
+import * as tldts from 'tldts';
 import { sort } from './timsort';
 import { sort } from './timsort';
 
 
-const compare = (a: string, b: string) => {
+export const compare = (a: string, b: string) => {
   if (a === b) return 0;
   if (a === b) return 0;
 
 
   const aLen = a.length;
   const aLen = a.length;
@@ -27,11 +27,13 @@ const compare = (a: string, b: string) => {
   return 0;
   return 0;
 };
 };
 
 
-export const sortDomains = (inputs: string[], gorhill: PublicSuffixList) => {
+const tldtsOpt = { allowPrivateDomains: false, detectIp: false, validateHostname: false };
+
+export const sortDomains = (inputs: string[]) => {
   const domains = inputs.reduce<Map<string, string>>((acc, cur) => {
   const domains = inputs.reduce<Map<string, string>>((acc, cur) => {
     if (!acc.has(cur)) {
     if (!acc.has(cur)) {
-      const topD = gorhill.getDomain(cur[0] === '.' ? cur.slice(1) : cur);
-      acc.set(cur, topD);
+      const topD = tldts.getDomain(cur, tldtsOpt);
+      acc.set(cur, topD ?? cur);
     };
     };
     return acc;
     return acc;
   }, new Map());
   }, new Map());
@@ -42,7 +44,7 @@ export const sortDomains = (inputs: string[], gorhill: PublicSuffixList) => {
     const $a = domains.get(a)!;
     const $a = domains.get(a)!;
     const $b = domains.get(b)!;
     const $b = domains.get(b)!;
 
 
-    if ($a === a && $b === b) {
+    if (a === $a && b === $b) {
       return compare(a, b);
       return compare(a, b);
     }
     }
     return compare($a, $b) || compare(a, b);
     return compare($a, $b) || compare(a, b);