浏览代码

Feat: validate domains are alive (and w/ CI)

SukkaW 1 年之前
父节点
当前提交
0bc901cdf8
共有 9 个文件被更改,包括 351 次插入10 次删除
  1. 24 0
      .github/workflows/check-source-domain.yml
  2. 3 7
      .github/workflows/main.yml
  3. 1 1
      .gitignore
  4. 22 0
      Build/mod.d.ts
  5. 230 0
      Build/validate-domain-alive.ts
  6. 1 1
      Source/domainset/cdn.conf
  7. 6 0
      package.json
  8. 63 0
      pnpm-lock.yaml
  9. 1 1
      tsconfig.json

+ 24 - 0
.github/workflows/check-source-domain.yml

@@ -0,0 +1,24 @@
+name: Check Domain Availability
+on:
+  # manual trigger only
+  workflow_dispatch:
+
+jobs:
+  build:
+    name: Build
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+      - uses: pnpm/action-setup@v4
+        with:
+          run_install: false
+      - uses: actions/setup-node@v4
+        with:
+          node-version-file: ".node-version"
+          cache: "pnpm"
+      - run: pnpm install
+      - run: pnpm run node Build/validate-domain-alive.ts

+ 3 - 7
.github/workflows/main.yml

@@ -16,16 +16,13 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-      - name: Checkout
-        uses: actions/checkout@v4
+      - uses: actions/checkout@v4
         with:
           persist-credentials: false
       - uses: pnpm/action-setup@v4
-        name: Install pnpm
         with:
           run_install: false
-      - name: Use Node.js
-        uses: actions/setup-node@v4
+      - uses: actions/setup-node@v4
         with:
           node-version-file: ".node-version"
           cache: "pnpm"
@@ -54,8 +51,7 @@ jobs:
             ${{ runner.os }}-v3-${{ steps.date.outputs.year }}-${{ steps.date.outputs.month }}-
             ${{ runner.os }}-v3-${{ steps.date.outputs.year }}-
             ${{ runner.os }}-v3-
-      - name: Install dependencies
-        run: pnpm install
+      - run: pnpm install
       - run: pnpm run build
       - name: Pre-deploy check
         # If the public directory doesn't exist, the build should fail.

+ 1 - 1
.gitignore

@@ -4,4 +4,4 @@ node_modules
 .wireit
 .cache
 public
-tmp*
+tmp.*

+ 22 - 0
Build/mod.d.ts

@@ -0,0 +1,22 @@
+import 'dns2';
+
+declare module 'dns2' {
+  import DNS from 'dns2';
+
+  declare namespace DNS {
+    interface DoHClientOption {
+      /** @example dns.google.com */
+      dns: string,
+      /** @description whether to use HTTP or HTTPS */
+      http: boolean
+    }
+
+    export type DnsResolver<T = DnsResponse> = (name: string, type: PacketQuestion) => Promise<T>;
+
+    declare function DOHClient(opt: DoHClientOption): DnsResolver;
+
+    export type $DnsResponse = DnsResponse;
+  }
+
+  export = DNS;
+}

+ 230 - 0
Build/validate-domain-alive.ts

@@ -0,0 +1,230 @@
+import DNS2 from 'dns2';
+import { readFileByLine } from './lib/fetch-text-by-line';
+import { processLine } from './lib/process-line';
+import tldts from 'tldts';
+import { looseTldtsOpt } from './constants/loose-tldts-opt';
+import { fdir as Fdir } from 'fdir';
+import { SOURCE_DIR } from './constants/dir';
+import path from 'node:path';
+import { newQueue } from '@henrygd/queue';
+import asyncRetry from 'async-retry';
+import * as whoiser from 'whoiser';
+import picocolors from 'picocolors';
+
+const dohServers: Array<[string, DNS2.DnsResolver]> = ([
+  '8.8.8.8',
+  '8.8.4.4',
+  '1.0.0.1',
+  '1.1.1.1',
+  '162.159.36.1',
+  '162.159.46.1',
+  '101.101.101.101', // TWNIC
+  '185.222.222.222', // DNS.SB
+  '45.11.45.11', // DNS.SB
+  '9.9.9.10', // Quad9 unfiltered
+  '149.112.112.10', // Quad9 unfiltered
+  '208.67.222.2', // OpenDNS sandbox (unfiltered)
+  '208.67.220.2', // OpenDNS sandbox (unfiltered)
+  '94.140.14.140', // AdGuard unfiltered
+  '94.140.14.141', // AdGuard unfiltered
+  // '76.76.2.0', // ControlD unfiltered, path not /dns-query
+  // '76.76.10.0', // ControlD unfiltered, path not /dns-query
+  '193.110.81.0', // dns0.eu
+  '185.253.5.0', // dns0.eu
+  'dns.nextdns.io',
+  'wikimedia-dns.org',
+  // 'ordns.he.net',
+  'dns.mullvad.net'
+  // 'ada.openbld.net',
+  // 'dns.rabbitdns.org'
+] as const).map(server => [
+  server,
+  DNS2.DOHClient({
+    dns: server,
+    http: false
+  })
+] as const);
+
+const queue = newQueue(8);
+
+class DnsError extends Error {
+  name = 'DnsError';
+  constructor(readonly message: string, public readonly server: string) {
+    super(message);
+  }
+}
+
+interface DnsResponse extends DNS2.$DnsResponse {
+  dns: string
+}
+
+const resolve: DNS2.DnsResolver<DnsResponse> = async (...args) => {
+  try {
+    return await asyncRetry(async () => {
+      const [dohServer, dohClient] = dohServers[Math.floor(Math.random() * dohServers.length)];
+
+      try {
+        const resp = await dohClient(...args);
+        return {
+          ...resp,
+          dns: dohServer
+        } satisfies DnsResponse;
+      } catch (e) {
+        throw new DnsError((e as Error).message, dohServer);
+      }
+    }, { retries: 5 });
+  } catch (e) {
+    console.log('[doh error]', ...args, e);
+    throw e;
+  }
+};
+
+(async () => {
+  const domainSets = await new Fdir()
+    .withFullPaths()
+    .crawl(SOURCE_DIR + path.sep + 'domainset')
+    .withPromise();
+  const domainRules = await new Fdir()
+    .withFullPaths()
+    .crawl(SOURCE_DIR + path.sep + 'non_ip')
+    .withPromise();
+
+  await Promise.all([
+    ...domainSets.map(runAgainstDomainset),
+    ...domainRules.map(runAgainstRuleset)
+  ]);
+
+  console.log('done');
+})();
+
+const domainAliveMap = new Map<string, boolean>();
+async function isApexDomainAlive(apexDomain: string): Promise<[string, boolean]> {
+  if (domainAliveMap.has(apexDomain)) {
+    return [apexDomain, domainAliveMap.get(apexDomain)!];
+  }
+
+  const resp = await resolve(apexDomain, 'NS');
+
+  if (resp.answers.length > 0) {
+    console.log(picocolors.green('[domain alive]'), 'NS record', apexDomain);
+    return [apexDomain, true];
+  }
+
+  let whois;
+
+  try {
+    whois = await whoiser.domain(apexDomain);
+  } catch (e) {
+    console.log('[whois fail]', 'whois error', { domain: apexDomain }, e);
+    return [apexDomain, true];
+  }
+
+  if (Object.keys(whois).length > 0) {
+    // TODO: this is a workaround for https://github.com/LayeredStudio/whoiser/issues/117
+    if ('text' in whois && (whois.text as string[]).some(value => value.includes('No match for'))) {
+      console.log(picocolors.red('[domain dead]'), 'whois no match', { domain: apexDomain });
+      domainAliveMap.set(apexDomain, false);
+      return [apexDomain, false];
+    }
+
+    console.log(picocolors.green('[domain alive]'), 'recorded in whois', apexDomain);
+    return [apexDomain, true];
+  }
+
+  if (!('dns' in whois)) {
+    console.log({ whois });
+  }
+
+  console.log(picocolors.red('[domain dead]'), 'whois no match', { domain: apexDomain });
+  domainAliveMap.set(apexDomain, false);
+  return [apexDomain, false];
+}
+
+const domainMutex = new Map<string, Promise<[string, boolean]>>();
+
+export async function isDomainAlive(domain: string, isSuffix: boolean): Promise<[string, boolean]> {
+  if (domain[0] === '.') {
+    domain = domain.slice(1);
+  }
+
+  const apexDomain = tldts.getDomain(domain, looseTldtsOpt);
+  if (!apexDomain) {
+    console.log('[domain invalid]', 'no apex domain', { domain });
+    return [domain, true] as const;
+  }
+
+  let apexDomainAlivePromise;
+  if (domainMutex.has(domain)) {
+    apexDomainAlivePromise = domainMutex.get(domain)!;
+  } else {
+    apexDomainAlivePromise = queue.add(() => isApexDomainAlive(apexDomain).then(res => {
+      domainMutex.delete(domain);
+      return res;
+    }));
+    domainMutex.set(domain, apexDomainAlivePromise);
+  }
+  const apexDomainAlive = await apexDomainAlivePromise;
+
+  if (!apexDomainAlive[1]) {
+    domainAliveMap.set(domain, false);
+    return [domain, false] as const;
+  }
+
+  if (!isSuffix) {
+    const aRecords = (await resolve(domain, 'A'));
+    if (aRecords.answers.length === 0) {
+      const aaaaRecords = (await resolve(domain, 'AAAA'));
+      if (aaaaRecords.answers.length === 0) {
+        console.log(picocolors.red('[domain dead]'), 'no A/AAAA records', { domain, a: aRecords.dns, aaaa: aaaaRecords.dns });
+        domainAliveMap.set(domain, false);
+        return [domain, false] as const;
+      }
+    }
+  }
+
+  domainAliveMap.set(domain, true);
+  return [domain, true] as const;
+}
+
+export async function runAgainstRuleset(filepath: string) {
+  const promises: Array<Promise<[string, boolean]>> = [];
+
+  for await (const l of readFileByLine(filepath)) {
+    const line = processLine(l);
+    if (!line) continue;
+    const [type, domain] = line.split(',');
+    switch (type) {
+      case 'DOMAIN-SUFFIX':
+      case 'DOMAIN': {
+        if (!domainMutex.has(domain)) {
+          const promise = queue.add(() => isDomainAlive(domain, type === 'DOMAIN-SUFFIX')).then(res => {
+            domainMutex.delete(domain);
+            return res;
+          });
+          domainMutex.set(domain, promise);
+          promises.push(promise);
+        }
+        break;
+      }
+      // no default
+      // case 'DOMAIN-KEYWORD': {
+      //   break;
+      // }
+      // no default
+    }
+  }
+
+  return Promise.all(promises);
+}
+
+export async function runAgainstDomainset(filepath: string) {
+  const promises: Array<Promise<[string, boolean]>> = [];
+
+  for await (const l of readFileByLine(filepath)) {
+    const line = processLine(l);
+    if (!line) continue;
+    promises.push(isDomainAlive(line, line[0] === '.'));
+  }
+
+  return Promise.all(promises);
+}

+ 1 - 1
Source/domainset/cdn.conf

@@ -683,7 +683,7 @@ image.ibb.co
 .ax1x.com
 # PostImage
 .postimg.cc
-.postimg.org
+# .postimg.org - domain locked by registry since no later than Apr. 2018 (https://web.archive.org/web/20190208000038/https://www.phpbb.com/customise/db/extension/postimage/support/topic/191346)
 # Image Proxy
 images.weserv.nl
 # Imageshack

+ 6 - 0
package.json

@@ -21,7 +21,9 @@
   "license": "ISC",
   "dependencies": {
     "@ghostery/adblocker": "^2.0.3",
+    "@henrygd/queue": "^1.0.7",
     "@jsdevtools/ez-spawn": "^3.0.4",
+    "async-retry": "^1.3.3",
     "async-sema": "^3.1.1",
     "better-sqlite3": "^11.5.0",
     "cacache": "^19.0.1",
@@ -30,6 +32,7 @@
     "cli-table3": "^0.6.5",
     "csv-parse": "^5.5.6",
     "devalue": "^5.1.1",
+    "dns2": "^2.1.0",
     "fast-cidr-tools": "^0.3.1",
     "fdir": "^6.4.2",
     "foxact": "^0.2.41",
@@ -43,6 +46,7 @@
     "tldts": "^6.1.58",
     "tldts-experimental": "^6.1.58",
     "undici": "6.20.1",
+    "whoiser": "^1.18.0",
     "why-is-node-running": "^3.2.1",
     "yaml": "^2.6.0"
   },
@@ -50,8 +54,10 @@
     "@eslint-sukka/node": "^6.9.0",
     "@swc-node/register": "^1.10.9",
     "@swc/core": "^1.7.42",
+    "@types/async-retry": "^1.4.9",
     "@types/better-sqlite3": "^7.6.11",
     "@types/cacache": "^17.0.2",
+    "@types/dns2": "^2.0.9",
     "@types/make-fetch-happen": "^10.0.4",
     "@types/mocha": "^10.0.9",
     "@types/node": "^22.8.7",

+ 63 - 0
pnpm-lock.yaml

@@ -19,9 +19,15 @@ importers:
       '@ghostery/adblocker':
         specifier: ^2.0.3
         version: 2.0.3
+      '@henrygd/queue':
+        specifier: ^1.0.7
+        version: 1.0.7
       '@jsdevtools/ez-spawn':
         specifier: ^3.0.4
         version: 3.0.4
+      async-retry:
+        specifier: ^1.3.3
+        version: 1.3.3
       async-sema:
         specifier: ^3.1.1
         version: 3.1.1
@@ -46,6 +52,9 @@ importers:
       devalue:
         specifier: ^5.1.1
         version: 5.1.1
+      dns2:
+        specifier: ^2.1.0
+        version: 2.1.0
       fast-cidr-tools:
         specifier: ^0.3.1
         version: 0.3.1
@@ -85,6 +94,9 @@ importers:
       undici:
         specifier: 6.20.1
         version: 6.20.1(patch_hash=yuj5uy4vvwj67xoliq5togiyme)
+      whoiser:
+        specifier: ^1.18.0
+        version: 1.18.0
       why-is-node-running:
         specifier: ^3.2.1
         version: 3.2.1
@@ -101,12 +113,18 @@ importers:
       '@swc/core':
         specifier: ^1.7.42
         version: 1.7.42
+      '@types/async-retry':
+        specifier: ^1.4.9
+        version: 1.4.9
       '@types/better-sqlite3':
         specifier: ^7.6.11
         version: 7.6.11
       '@types/cacache':
         specifier: ^17.0.2
         version: 17.0.2
+      '@types/dns2':
+        specifier: ^2.0.9
+        version: 2.0.9
       '@types/make-fetch-happen':
         specifier: ^10.0.4
         version: 10.0.4
@@ -246,6 +264,9 @@ packages:
   '@ghostery/adblocker@2.0.3':
     resolution: {integrity: sha512-b6sbsYzfwWeFpvOSs8VMiBc+d39xvErpLz8pxCJIyOAiDD41NgT72sDHlTNZkeYYHq2fKe4sArsjUMjCyjtI6A==}
 
+  '@henrygd/queue@1.0.7':
+    resolution: {integrity: sha512-Jmt/iO6yDlz9UYGILkm/Qzi/ckkEiTNZcqDvt3QFLE4OThPeiCj6tKsynHFm/ppl8RumWXAx1dZPBPiRPaaGig==}
+
   '@humanfs/core@0.19.1':
     resolution: {integrity: sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==}
     engines: {node: '>=18.18.0'}
@@ -507,6 +528,9 @@ packages:
   '@tybys/wasm-util@0.9.0':
     resolution: {integrity: sha512-6+7nlbMVX/PVDCwaIQ8nTOPveOcFLSt8GcXdx8hD0bt39uWxYT88uXzqTd4fTvqta7oeUJqudepapKNt2DYJFw==}
 
+  '@types/async-retry@1.4.9':
+    resolution: {integrity: sha512-s1ciZQJzRh3708X/m3vPExr5KJlzlZJvXsKpbtE2luqNcbROr64qU+3KpJsYHqWMeaxI839OvXf9PrUSw1Xtyg==}
+
   '@types/better-sqlite3@7.6.11':
     resolution: {integrity: sha512-i8KcD3PgGtGBLl3+mMYA8PdKkButvPyARxA7IQAd6qeslht13qxb1zzO8dRCtE7U3IoJS782zDBAeoKiM695kg==}
 
@@ -516,6 +540,9 @@ packages:
   '@types/chrome@0.0.279':
     resolution: {integrity: sha512-wl0IxQ2OQiMazPZM5LimHQ7Jwd72/O8UvvzyptplXT2S4eUqXH5C0n8S+v8PtKhyX89p0igCPpNy3Bwksyk57g==}
 
+  '@types/dns2@2.0.9':
+    resolution: {integrity: sha512-+eiPSuo/KfvaaW7DiMf/vrt9mhxbZCCgxoCa/c2qMVSuZQus4BzBuKRh8XBwcUXWAtvI8QuZOp13UODJNq9DXg==}
+
   '@types/eslint@9.6.1':
     resolution: {integrity: sha512-FXx2pKgId/WyYo2jXw63kk7/+TY7u7AziEJxJAnSFzHlqTAS3Ync6SvgYAN/k4/PQpnnVuzoMuVnByKK2qp0ag==}
 
@@ -723,6 +750,9 @@ packages:
   argparse@2.0.1:
     resolution: {integrity: sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==}
 
+  async-retry@1.3.3:
+    resolution: {integrity: sha512-wfr/jstw9xNi/0teMHrRW7dsz3Lt5ARhYNZ2ewpadnhaIp5mbALhOAP+EAdsC7t4Z6wqsDVv9+W6gm1Dk9mEyw==}
+
   async-sema@3.1.1:
     resolution: {integrity: sha512-tLRNUXati5MFePdAk8dw7Qt7DpxPB60ofAgn8WRhW6a2rcimZnYBP9oxHiv0OHy+Wz7kPMG+t4LGdt31+4EmGg==}
 
@@ -929,6 +959,9 @@ packages:
     resolution: {integrity: sha512-uIFDxqpRZGZ6ThOk84hEfqWoHx2devRFvpTZcTHur85vImfaxUbTW9Ryh4CpCuDnToOP1CEtXKIgytHBPVff5A==}
     engines: {node: '>=0.3.1'}
 
+  dns2@2.1.0:
+    resolution: {integrity: sha512-m27K11aQalRbmUs7RLaz6aPyceLjAoqjPRNTdE7qUouQpl+PC8Bi67O+i9SuJUPbQC8dxFrczAxfmTPuTKHNkw==}
+
   doctrine@3.0.0:
     resolution: {integrity: sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==}
     engines: {node: '>=6.0.0'}
@@ -1677,6 +1710,10 @@ packages:
     resolution: {integrity: sha512-9LkiTwjUh6rT555DtE9rTX+BKByPfrMzEAtnlEtdEwr3Nkffwiihqe2bWADg+OQRjt9gl6ICdmB/ZFDCGAtSow==}
     engines: {node: '>= 4'}
 
+  retry@0.13.1:
+    resolution: {integrity: sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==}
+    engines: {node: '>= 4'}
+
   reusify@1.0.4:
     resolution: {integrity: sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==}
     engines: {iojs: '>=1.0.0', node: '>=0.10.0'}
@@ -1926,6 +1963,10 @@ packages:
     engines: {node: '>= 8'}
     hasBin: true
 
+  whoiser@1.18.0:
+    resolution: {integrity: sha512-QRIGreBuouc8d9i+UVMFqYJSiG7gaoaGX8nKugYDGqnuNLLgjDBwmlKODOIGHveBawza3Kfkk/OuM9VsTUYwaA==}
+    engines: {node: '>=15.0.0'}
+
   why-is-node-running@3.2.1:
     resolution: {integrity: sha512-Tb2FUhB4vUsGQlfSquQLYkApkuPAFQXGFzxWKHHumVz2dK+X1RUm/HnID4+TfIGYJ1kTcwOaCk/buYCEJr6YjQ==}
     engines: {node: '>=20.11'}
@@ -2107,6 +2148,8 @@ snapshots:
       '@types/firefox-webext-browser': 120.0.4
       tldts-experimental: 6.1.58
 
+  '@henrygd/queue@1.0.7': {}
+
   '@humanfs/core@0.19.1': {}
 
   '@humanfs/node@0.16.6':
@@ -2351,6 +2394,10 @@ snapshots:
       tslib: 2.8.0
     optional: true
 
+  '@types/async-retry@1.4.9':
+    dependencies:
+      '@types/retry': 0.12.5
+
   '@types/better-sqlite3@7.6.11':
     dependencies:
       '@types/node': 22.8.7
@@ -2364,6 +2411,10 @@ snapshots:
       '@types/filesystem': 0.0.36
       '@types/har-format': 1.2.16
 
+  '@types/dns2@2.0.9':
+    dependencies:
+      '@types/node': 22.8.7
+
   '@types/eslint@9.6.1':
     dependencies:
       '@types/estree': 1.0.6
@@ -2604,6 +2655,10 @@ snapshots:
 
   argparse@2.0.1: {}
 
+  async-retry@1.3.3:
+    dependencies:
+      retry: 0.13.1
+
   async-sema@3.1.1: {}
 
   asynckit@0.4.0: {}
@@ -2807,6 +2862,8 @@ snapshots:
 
   diff@5.2.0: {}
 
+  dns2@2.1.0: {}
+
   doctrine@3.0.0:
     dependencies:
       esutils: 2.0.3
@@ -3653,6 +3710,8 @@ snapshots:
 
   retry@0.12.0: {}
 
+  retry@0.13.1: {}
+
   reusify@1.0.4: {}
 
   rimraf@5.0.10:
@@ -3905,6 +3964,10 @@ snapshots:
     dependencies:
       isexe: 2.0.0
 
+  whoiser@1.18.0:
+    dependencies:
+      punycode: 2.3.1
+
   why-is-node-running@3.2.1: {}
 
   word-wrap@1.2.5: {}

+ 1 - 1
tsconfig.json

@@ -17,6 +17,6 @@
   "include": [
     "./Source/**/*.js",
     "./Build/**/*.ts",
-    "Source/**/*.ts"
+    "./Source/**/*.ts"
   ]
 }