Browse Source

Fix: improve whois matching w/ domain checking

SukkaW 1 year ago
parent
commit
57b5d2933f
3 changed files with 354 additions and 227 deletions
  1. 77 0
      Build/lib/is-domain-alive.test.ts
  2. 272 0
      Build/lib/is-domain-alive.ts
  3. 5 227
      Build/validate-domain-alive.ts

+ 77 - 0
Build/lib/is-domain-alive.test.ts

@@ -0,0 +1,77 @@
+import { describe, it } from 'mocha';
+
+import { isDomainAlive, whoisExists } from './is-domain-alive';
+import { expect } from 'expect';
+
+describe('whoisExists', () => {
+  it('.cryptocrawler.io', () => {
+    expect(whoisExists({
+      'whois.nic.io': {
+        'Domain Status': [],
+        'Name Server': [],
+        '>>> Last update of WHOIS database': '2025-01-05T11:06:38Z <<<',
+        text: [
+          'Domain not found.',
+          '',
+          'Terms of Use: Access to WHOIS'
+        ]
+      }
+    })).toBe(false);
+  });
+
+  it('.tunevideo.ru', () => {
+    expect(whoisExists({
+      'whois.tcinet.ru': {
+        'Domain Status': [],
+        'Name Server': [],
+        text: [
+          '% TCI Whois Service. Terms of use:',
+          '% https://tcinet.ru/documents/whois_ru_rf.pdf (in Russian)',
+          '% https://tcinet.ru/documents/whois_su.pdf (in Russian)',
+          '',
+          'No entries found for the selected source(s).',
+          '',
+          'Last updated on 2025-01-05T11:03:01Z'
+        ]
+      }
+    })).toBe(false);
+  });
+
+  it('.myqloud.com', () => {
+    expect(whoisExists({
+      'whois.tcinet.ru': {
+        'Domain Status': [],
+        'Name Server': [],
+        text: [
+          '% TCI Whois Service. Terms of use:',
+          '% https://tcinet.ru/documents/whois_ru_rf.pdf (in Russian)',
+          '% https://tcinet.ru/documents/whois_su.pdf (in Russian)',
+          '',
+          'No entries found for the selected source(s).',
+          '',
+          'Last updated on 2025-01-05T11:03:01Z'
+        ]
+      }
+    })).toBe(false);
+  });
+});
+
+describe('isDomainAlive', function () {
+  this.timeout(10000);
+
+  it('.cryptocrawler.io', async () => {
+    expect((await isDomainAlive('.cryptocrawler.io', true))[1]).toEqual(false);
+  });
+
+  it('.tunevideo.ru', async () => {
+    expect((await isDomainAlive('.tunevideo.ru', true))[1]).toEqual(false);
+  });
+
+  it('.myqloud.com', async () => {
+    expect((await isDomainAlive('.myqloud.com', true))[1]).toEqual(true);
+  });
+
+  it('discount-deal.org', async () => {
+    expect((await isDomainAlive('discount-deal.org', false))[1]).toEqual(false);
+  });
+});

+ 272 - 0
Build/lib/is-domain-alive.ts

@@ -0,0 +1,272 @@
+import tldts from 'tldts-experimental';
+import { looseTldtsOpt } from '../constants/loose-tldts-opt';
+import picocolors from 'picocolors';
+
+import DNS2 from 'dns2';
+import asyncRetry from 'async-retry';
+import * as whoiser from 'whoiser';
+
+import { createRetrieKeywordFilter as createKeywordFilter } from 'foxts/retrie';
+
+const mutex = new Map<string, Promise<unknown>>();
+export function keyedAsyncMutexWithQueue<T>(key: string, fn: () => Promise<T>) {
+  if (mutex.has(key)) {
+    return mutex.get(key) as Promise<T>;
+  }
+  const promise = fn();
+  mutex.set(key, promise);
+  return promise;
+}
+
+class DnsError extends Error {
+  name = 'DnsError';
+  constructor(readonly message: string, public readonly server: string) {
+    super(message);
+  }
+}
+
+interface DnsResponse extends DNS2.$DnsResponse {
+  dns: string
+}
+
+const dohServers: Array<[string, DNS2.DnsResolver]> = ([
+  '8.8.8.8',
+  '8.8.4.4',
+  '1.0.0.1',
+  '1.1.1.1',
+  '162.159.36.1',
+  '162.159.46.1',
+  '101.101.101.101', // TWNIC
+  '185.222.222.222', // DNS.SB
+  '45.11.45.11', // DNS.SB
+  'dns10.quad9.net', // Quad9 unfiltered
+  'doh.sandbox.opendns.com', // OpenDNS sandbox (unfiltered)
+  'unfiltered.adguard-dns.com',
+  // '0ms.dev', // Proxy Cloudflare
+  // '76.76.2.0', // ControlD unfiltered, path not /dns-query
+  // '76.76.10.0', // ControlD unfiltered, path not /dns-query
+  // 'dns.bebasid.com', // BebasID, path not /dns-query but /unfiltered
+  // '193.110.81.0', // dns0.eu
+  // '185.253.5.0', // dns0.eu
+  // 'zero.dns0.eu',
+  'dns.nextdns.io',
+  'anycast.dns.nextdns.io',
+  'wikimedia-dns.org',
+  // 'ordns.he.net',
+  // 'dns.mullvad.net',
+  'basic.rethinkdns.com'
+  // 'ada.openbld.net',
+  // 'dns.rabbitdns.org'
+] as const).map(dns => [
+  dns,
+  DNS2.DOHClient({
+    dns,
+    http: false
+    // get: (url: string) => undici.request(url).then(r => r.body)
+  })
+] as const);
+
+const domesticDohServers: Array<[string, DNS2.DnsResolver]> = ([
+  '223.5.5.5',
+  '223.6.6.6',
+  '120.53.53.53',
+  '1.12.12.12'
+] as const).map(dns => [
+  dns,
+  DNS2.DOHClient({
+    dns,
+    http: false
+    // get: (url: string) => undici.request(url).then(r => r.body)
+  })
+] as const);
+
+function createResolve(server: Array<[string, DNS2.DnsResolver]>): DNS2.DnsResolver<DnsResponse> {
+  return async (...args) => {
+    try {
+      return await asyncRetry(async () => {
+        const [dohServer, dohClient] = server[Math.floor(Math.random() * server.length)];
+
+        try {
+          return {
+            ...await dohClient(...args),
+            dns: dohServer
+          } satisfies DnsResponse;
+        } catch (e) {
+          // console.error(e);
+          throw new DnsError((e as Error).message, dohServer);
+        }
+      }, { retries: 5 });
+    } catch (e) {
+      console.log('[doh error]', ...args, e);
+      throw e;
+    }
+  };
+}
+
+const resolve = createResolve(dohServers);
+const domesticResolve = createResolve(domesticDohServers);
+
+async function getWhois(domain: string) {
+  return asyncRetry(() => whoiser.domain(domain), { retries: 5 });
+}
+
+const domainAliveMap = new Map<string, boolean>();
+function onDomainAlive(domain: string): [string, boolean] {
+  domainAliveMap.set(domain, true);
+  return [domain, true];
+}
+function onDomainDead(domain: string): [string, boolean] {
+  domainAliveMap.set(domain, false);
+  return [domain, false];
+}
+
+export async function isDomainAlive(domain: string, isSuffix: boolean): Promise<[string, boolean]> {
+  if (domainAliveMap.has(domain)) {
+    return [domain, domainAliveMap.get(domain)!];
+  }
+
+  const apexDomain = tldts.getDomain(domain, looseTldtsOpt);
+  if (!apexDomain) {
+    console.log(picocolors.gray('[domain invalid]'), picocolors.gray('no apex domain'), { domain });
+    return onDomainAlive(domain);
+  }
+
+  const apexDomainAlive = await keyedAsyncMutexWithQueue(apexDomain, () => isApexDomainAlive(apexDomain));
+  if (isSuffix) {
+    return apexDomainAlive;
+  }
+  if (!apexDomainAlive[1]) {
+    return apexDomainAlive;
+  }
+
+  const $domain = domain[0] === '.' ? domain.slice(1) : domain;
+
+  const aDns: string[] = [];
+  const aaaaDns: string[] = [];
+
+  // test 2 times before make sure record is empty
+  for (let i = 0; i < 2; i++) {
+    // eslint-disable-next-line no-await-in-loop -- sequential
+    const aRecords = (await resolve($domain, 'A'));
+    if (aRecords.answers.length > 0) {
+      return onDomainAlive(domain);
+    }
+
+    aDns.push(aRecords.dns);
+  }
+  for (let i = 0; i < 2; i++) {
+    // eslint-disable-next-line no-await-in-loop -- sequential
+    const aaaaRecords = (await resolve($domain, 'AAAA'));
+    if (aaaaRecords.answers.length > 0) {
+      return onDomainAlive(domain);
+    }
+
+    aaaaDns.push(aaaaRecords.dns);
+  }
+
+  // only then, let's test once with domesticDohServers
+  const aRecords = (await domesticResolve($domain, 'A'));
+  if (aRecords.answers.length > 0) {
+    return onDomainAlive(domain);
+  }
+  aDns.push(aRecords.dns);
+
+  const aaaaRecords = (await domesticResolve($domain, 'AAAA'));
+  if (aaaaRecords.answers.length > 0) {
+    return onDomainAlive(domain);
+  }
+  aaaaDns.push(aaaaRecords.dns);
+
+  console.log(picocolors.red('[domain dead]'), 'no A/AAAA records', { domain, a: aDns, aaaa: aaaaDns });
+  return onDomainDead($domain);
+}
+
+const apexDomainNsResolvePromiseMap = new Map<string, Promise<DnsResponse>>();
+
+async function isApexDomainAlive(apexDomain: string): Promise<[string, boolean]> {
+  if (domainAliveMap.has(apexDomain)) {
+    return [apexDomain, domainAliveMap.get(apexDomain)!];
+  }
+
+  let resp: DnsResponse;
+  if (apexDomainNsResolvePromiseMap.has(apexDomain)) {
+    resp = await apexDomainNsResolvePromiseMap.get(apexDomain)!;
+  } else {
+    const promise = resolve(apexDomain, 'NS');
+    apexDomainNsResolvePromiseMap.set(apexDomain, promise);
+    resp = await promise;
+  }
+
+  if (resp.answers.length > 0) {
+    return onDomainAlive(apexDomain);
+  }
+
+  let whois;
+
+  try {
+    whois = await getWhois(apexDomain);
+  } catch (e) {
+    console.log(picocolors.red('[domain dead]'), 'whois error', { domain: apexDomain }, e);
+    return onDomainDead(apexDomain);
+  }
+
+  // console.log(JSON.stringify(whois, null, 2));
+
+  if (whoisExists(whois)) {
+    console.log(picocolors.gray('[domain alive]'), 'whois found', { domain: apexDomain });
+    return onDomainAlive(apexDomain);
+  }
+
+  console.log(picocolors.red('[domain dead]'), 'whois not found', { domain: apexDomain });
+  return onDomainDead(apexDomain);
+}
+
+// TODO: this is a workaround for https://github.com/LayeredStudio/whoiser/issues/117
+const whoisNotFoundKeywordTest = createKeywordFilter([
+  'no match for',
+  'does not exist',
+  'not found',
+  'no entries',
+  'no data found',
+  'is available for registration',
+  'currently available for application'
+]);
+
+export function whoisExists(whois: whoiser.WhoisSearchResult) {
+  let empty = true;
+
+  for (const key in whois) {
+    if (Object.hasOwn(whois, key)) {
+      empty = false;
+
+      if (key === 'error') {
+        if (
+          (typeof whois.error === 'string' && whois.error)
+          || (Array.isArray(whois.error) && whois.error.length > 0)
+        ) {
+          console.error(whois);
+          return true;
+        }
+        continue;
+      }
+
+      if (key === 'text') {
+        if (Array.isArray(whois.text) && whois.text.some(value => whoisNotFoundKeywordTest(value.toLowerCase()))) {
+          return false;
+        }
+        continue;
+      }
+      if (key === 'Name Server') {
+        if (Array.isArray(whois[key]) && whois[key].length === 0) {
+          return false;
+        }
+        continue;
+      }
+
+      if (typeof whois[key] === 'object' && !Array.isArray(whois[key]) && !whoisExists(whois[key])) {
+        return false;
+      }
+    }
+  }
+  return !empty;
+}

+ 5 - 227
Build/validate-domain-alive.ts

@@ -1,119 +1,13 @@
-import DNS2 from 'dns2';
 import { readFileByLine } from './lib/fetch-text-by-line';
 import { processLine } from './lib/process-line';
-import tldts from 'tldts-experimental';
-import { looseTldtsOpt } from './constants/loose-tldts-opt';
-import { fdir as Fdir } from 'fdir';
+
 import { SOURCE_DIR } from './constants/dir';
 import path from 'node:path';
 import { newQueue } from '@henrygd/queue';
-import asyncRetry from 'async-retry';
-import * as whoiser from 'whoiser';
-import picocolors from 'picocolors';
-import { createRetrieKeywordFilter as createKeywordFilter } from 'foxts/retrie';
-
-const dohServers: Array<[string, DNS2.DnsResolver]> = ([
-  '8.8.8.8',
-  '8.8.4.4',
-  '1.0.0.1',
-  '1.1.1.1',
-  '162.159.36.1',
-  '162.159.46.1',
-  '101.101.101.101', // TWNIC
-  '185.222.222.222', // DNS.SB
-  '45.11.45.11', // DNS.SB
-  'dns10.quad9.net', // Quad9 unfiltered
-  'doh.sandbox.opendns.com', // OpenDNS sandbox (unfiltered)
-  'unfiltered.adguard-dns.com',
-  // '0ms.dev', // Proxy Cloudflare
-  // '76.76.2.0', // ControlD unfiltered, path not /dns-query
-  // '76.76.10.0', // ControlD unfiltered, path not /dns-query
-  // 'dns.bebasid.com', // BebasID, path not /dns-query but /unfiltered
-  // '193.110.81.0', // dns0.eu
-  // '185.253.5.0', // dns0.eu
-  // 'zero.dns0.eu',
-  'dns.nextdns.io',
-  'anycast.dns.nextdns.io',
-  'wikimedia-dns.org',
-  // 'ordns.he.net',
-  // 'dns.mullvad.net',
-  'basic.rethinkdns.com'
-  // 'ada.openbld.net',
-  // 'dns.rabbitdns.org'
-] as const).map(dns => [
-  dns,
-  DNS2.DOHClient({
-    dns,
-    http: false
-    // get: (url: string) => undici.request(url).then(r => r.body)
-  })
-] as const);
-
-const domesticDohServers: Array<[string, DNS2.DnsResolver]> = ([
-  '223.5.5.5',
-  '223.6.6.6',
-  '120.53.53.53',
-  '1.12.12.12'
-] as const).map(dns => [
-  dns,
-  DNS2.DOHClient({
-    dns,
-    http: false
-    // get: (url: string) => undici.request(url).then(r => r.body)
-  })
-] as const);
+import { isDomainAlive, keyedAsyncMutexWithQueue } from './lib/is-domain-alive';
+import { fdir as Fdir } from 'fdir';
 
 const queue = newQueue(32);
-const mutex = new Map<string, Promise<unknown>>();
-function keyedAsyncMutexWithQueue<T>(key: string, fn: () => Promise<T>) {
-  if (mutex.has(key)) {
-    return mutex.get(key) as Promise<T>;
-  }
-  const promise = queue.add(() => fn());
-  mutex.set(key, promise);
-  return promise;
-}
-
-class DnsError extends Error {
-  name = 'DnsError';
-  constructor(readonly message: string, public readonly server: string) {
-    super(message);
-  }
-}
-
-interface DnsResponse extends DNS2.$DnsResponse {
-  dns: string
-}
-
-function createResolve(server: Array<[string, DNS2.DnsResolver]>): DNS2.DnsResolver<DnsResponse> {
-  return async (...args) => {
-    try {
-      return await asyncRetry(async () => {
-        const [dohServer, dohClient] = server[Math.floor(Math.random() * server.length)];
-
-        try {
-          return {
-            ...await dohClient(...args),
-            dns: dohServer
-          } satisfies DnsResponse;
-        } catch (e) {
-          // console.error(e);
-          throw new DnsError((e as Error).message, dohServer);
-        }
-      }, { retries: 5 });
-    } catch (e) {
-      console.log('[doh error]', ...args, e);
-      throw e;
-    }
-  };
-}
-
-const resolve = createResolve(dohServers);
-const domesticResolve = createResolve(domesticDohServers);
-
-async function getWhois(domain: string) {
-  return asyncRetry(() => whoiser.domain(domain), { retries: 5 });
-}
 
 (async () => {
   const domainSets = await new Fdir()
@@ -133,122 +27,6 @@ async function getWhois(domain: string) {
   console.log('done');
 })();
 
-const whoisNotFoundKeywordTest = createKeywordFilter([
-  'no match for',
-  'does not exist',
-  'not found'
-]);
-
-const domainAliveMap = new Map<string, boolean>();
-async function isApexDomainAlive(apexDomain: string): Promise<[string, boolean]> {
-  if (domainAliveMap.has(apexDomain)) {
-    return [apexDomain, domainAliveMap.get(apexDomain)!];
-  }
-
-  const resp = await resolve(apexDomain, 'NS');
-
-  if (resp.answers.length > 0) {
-    return [apexDomain, true];
-  }
-
-  let whois;
-
-  try {
-    whois = await getWhois(apexDomain);
-  } catch (e) {
-    console.log('[whois fail]', 'whois error', { domain: apexDomain }, e);
-    return [apexDomain, true];
-  }
-
-  if (Object.keys(whois).length > 0) {
-    // TODO: this is a workaround for https://github.com/LayeredStudio/whoiser/issues/117
-    if ('text' in whois && Array.isArray(whois.text) && whois.text.some(value => whoisNotFoundKeywordTest(value.toLowerCase()))) {
-      console.log(picocolors.red('[domain dead]'), 'whois not found', { domain: apexDomain });
-      domainAliveMap.set(apexDomain, false);
-      return [apexDomain, false];
-    }
-
-    return [apexDomain, true];
-  }
-
-  if (!('dns' in whois)) {
-    console.log({ whois });
-  }
-
-  console.log(picocolors.red('[domain dead]'), 'whois not found', { domain: apexDomain });
-  domainAliveMap.set(apexDomain, false);
-  return [apexDomain, false];
-}
-
-export async function isDomainAlive(domain: string, isSuffix: boolean): Promise<[string, boolean]> {
-  if (domainAliveMap.has(domain)) {
-    return [domain, domainAliveMap.get(domain)!];
-  }
-
-  const apexDomain = tldts.getDomain(domain, looseTldtsOpt);
-  if (!apexDomain) {
-    console.log('[domain invalid]', 'no apex domain', { domain });
-    domainAliveMap.set(domain, true);
-    return [domain, true] as const;
-  }
-
-  const apexDomainAlive = await isApexDomainAlive(apexDomain);
-
-  if (!apexDomainAlive[1]) {
-    domainAliveMap.set(domain, false);
-    return [domain, false] as const;
-  }
-
-  const $domain = domain[0] === '.' ? domain.slice(1) : domain;
-
-  if (!isSuffix) {
-    const aDns: string[] = [];
-    const aaaaDns: string[] = [];
-
-    // test 2 times before make sure record is empty
-    for (let i = 0; i < 2; i++) {
-      // eslint-disable-next-line no-await-in-loop -- sequential
-      const aRecords = (await resolve($domain, 'A'));
-      if (aRecords.answers.length !== 0) {
-        domainAliveMap.set(domain, true);
-        return [domain, true] as const;
-      }
-
-      aDns.push(aRecords.dns);
-    }
-    for (let i = 0; i < 2; i++) {
-      // eslint-disable-next-line no-await-in-loop -- sequential
-      const aaaaRecords = (await resolve($domain, 'AAAA'));
-      if (aaaaRecords.answers.length !== 0) {
-        domainAliveMap.set(domain, true);
-        return [domain, true] as const;
-      }
-
-      aaaaDns.push(aaaaRecords.dns);
-    }
-
-    // only then, let's test once with domesticDohServers
-    const aRecords = (await domesticResolve($domain, 'A'));
-    if (aRecords.answers.length !== 0) {
-      domainAliveMap.set(domain, true);
-      return [domain, true] as const;
-    }
-    aDns.push(aRecords.dns);
-
-    const aaaaRecords = (await domesticResolve($domain, 'AAAA'));
-    if (aaaaRecords.answers.length !== 0) {
-      domainAliveMap.set(domain, true);
-      return [domain, true] as const;
-    }
-    aaaaDns.push(aaaaRecords.dns);
-
-    console.log(picocolors.red('[domain dead]'), 'no A/AAAA records', { domain, a: aDns, aaaa: aaaaDns });
-  }
-
-  domainAliveMap.set($domain, false);
-  return [domain, false] as const;
-}
-
 export async function runAgainstRuleset(filepath: string) {
   const extname = path.extname(filepath);
   if (extname !== '.conf') {
@@ -265,7 +43,7 @@ export async function runAgainstRuleset(filepath: string) {
     switch (type) {
       case 'DOMAIN-SUFFIX':
       case 'DOMAIN': {
-        promises.push(keyedAsyncMutexWithQueue(domain, () => isDomainAlive(domain, type === 'DOMAIN-SUFFIX')));
+        promises.push(queue.add(() => keyedAsyncMutexWithQueue(domain, () => isDomainAlive(domain, type === 'DOMAIN-SUFFIX'))));
         break;
       }
       // no default
@@ -288,7 +66,7 @@ export async function runAgainstDomainset(filepath: string) {
   for await (const l of readFileByLine(filepath)) {
     const line = processLine(l);
     if (!line) continue;
-    promises.push(keyedAsyncMutexWithQueue(line, () => isDomainAlive(line, line[0] === '.')));
+    promises.push(queue.add(() => keyedAsyncMutexWithQueue(line, () => isDomainAlive(line, line[0] === '.'))));
   }
 
   await Promise.all(promises);