ソースを参照

Chore: update domain alive check

SukkaW 10 ヶ月 前
コミット
803e503a1e

+ 4 - 35
Build/lib/is-domain-alive.test.ts

@@ -3,45 +3,14 @@ import { describe, it } from 'mocha';
 import { isDomainAlive } from './is-domain-alive';
 import { expect } from 'expect';
 
-import process from 'node:process';
-
 describe('isDomainAlive', function () {
   this.timeout(10000);
 
-  // it('.cryptocrawler.io', async () => {
-  //   expect((await isDomainAlive('.cryptocrawler.io', true))[1]).toEqual(false);
-  // });
-
-  // it('.tunevideo.ru', async () => {
-  //   expect((await isDomainAlive('.tunevideo.ru', true))[1]).toEqual(false);
-  // });
-
-  // it('.myqloud.com', async () => {
-  //   expect((await isDomainAlive('.myqloud.com', true))[1]).toEqual(true);
-  // });
-
-  // it('discount-deal.org', async () => {
-  //   expect((await isDomainAlive('discount-deal.org', false))[1]).toEqual(false);
-  // });
-
-  // it('ithome.com.tw', async () => {
-  //   expect((await isDomainAlive('ithome.com.tw', false))[1]).toEqual(true);
-  // });
-
-  // it('flipkart.com', async () => {
-  //   expect((await isDomainAlive('flipkart.com', false))[1]).toEqual(true);
-  // });
-
-  // it('lzzyimg.com', async () => {
-  //   expect((await isDomainAlive('.lzzyimg.com', true))[1]).toEqual(true);
-  // });
-
-  // it('tayfundogdas.me', async () => {
-  //   expect((await isDomainAlive('.tayfundogdas.me', true))[1]).toEqual(true);
-  // });
+  it('samsungcloudsolution.net', async () => {
+    expect((await isDomainAlive('samsungcloudsolution.net', true))).toEqual(false);
+  });
 
   it('ecdasoin.it', async () => {
-    process.env.DEBUG = 'true';
-    expect((await isDomainAlive('.ecdasoin.it', true))[1]).toEqual(false);
+    expect((await isDomainAlive('.ecdasoin.it', true))).toEqual(false);
   });
 });

+ 116 - 140
Build/lib/is-domain-alive.ts

@@ -1,25 +1,15 @@
-import tldts from 'tldts-experimental';
-import { looseTldtsOpt } from '../constants/loose-tldts-opt';
-import picocolors from 'picocolors';
-import { pickRandom, pickOne } from 'foxts/pick-random';
-
 import DNS2 from 'dns2';
 import asyncRetry from 'async-retry';
+import picocolors from 'picocolors';
+import { looseTldtsOpt } from '../constants/loose-tldts-opt';
+import { createKeyedAsyncMutex } from './keyed-async-mutex';
+import { pickRandom, pickOne } from 'foxts/pick-random';
+import tldts from 'tldts-experimental';
 import * as whoiser from 'whoiser';
-
-import { createRetrieKeywordFilter as createKeywordFilter } from 'foxts/retrie';
-
 import process from 'node:process';
+import { createRetrieKeywordFilter as createKeywordFilter } from 'foxts/retrie';
 
-const mutex = new Map<string, Promise<unknown>>();
-export function keyedAsyncMutexWithQueue<T>(key: string, fn: () => Promise<T>) {
-  if (mutex.has(key)) {
-    return mutex.get(key) as Promise<T>;
-  }
-  const promise = fn();
-  mutex.set(key, promise);
-  return promise;
-}
+const domainAliveMap = new Map<string, boolean>();
 
 class DnsError extends Error {
   name = 'DnsError';
@@ -88,163 +78,150 @@ const domesticDohServers: Array<[string, DNS2.DnsResolver]> = ([
   })
 ] as const);
 
-async function $resolve(name: string, type: DNS2.PacketQuestion, server: [string, DNS2.DnsResolver]) {
-  try {
-    return await asyncRetry(async () => {
-      const [dohServer, dohClient] = server;
+const domainAliveMutex = createKeyedAsyncMutex('isDomainAlive');
 
-      try {
-        return {
-          ...await dohClient(name, type),
-          dns: dohServer
-        } satisfies DnsResponse;
-      } catch (e) {
-        // console.error(e);
-        throw new DnsError((e as Error).message, dohServer);
-      }
-    }, { retries: 5 });
-  } catch (e) {
-    console.log('[doh error]', name, type, e);
-    throw e;
-  }
-}
-
-async function getWhois(domain: string) {
-  return asyncRetry(() => whoiser.domain(domain, { raw: true }), { retries: 5 });
-}
-
-const domainAliveMap = new Map<string, boolean>();
-function onDomainAlive(domain: string): [string, boolean] {
-  domainAliveMap.set(domain, true);
-  return [domain, true];
-}
-function onDomainDead(domain: string): [string, boolean] {
-  domainAliveMap.set(domain, false);
-  return [domain, false];
-}
-
-export async function isDomainAlive(domain: string, isSuffix: boolean): Promise<[string, boolean]> {
+export async function isDomainAlive(domain: string, isIncludeAllSubdomain: boolean = domain[0] === '.'): Promise<boolean> {
   if (domainAliveMap.has(domain)) {
-    return [domain, domainAliveMap.get(domain)!];
+    return domainAliveMap.get(domain)!;
   }
-
   const apexDomain = tldts.getDomain(domain, looseTldtsOpt);
   if (!apexDomain) {
     console.log(picocolors.gray('[domain invalid]'), picocolors.gray('no apex domain'), { domain });
-    return onDomainAlive(domain);
+    domainAliveMap.set('.' + domain, true);
+    return true;
   }
 
-  const apexDomainAlive = await keyedAsyncMutexWithQueue(apexDomain, () => isApexDomainAlive(apexDomain));
-  if (isSuffix) {
+  const apexDomainAlive = await isApexDomainAlive(apexDomain);
+  if (isIncludeAllSubdomain || domain.length > apexDomain.length) {
     return apexDomainAlive;
   }
-  if (!apexDomainAlive[1]) {
-    return apexDomainAlive;
+  if (!apexDomainAlive) {
+    return false;
   }
 
-  const $domain = domain[0] === '.' ? domain.slice(1) : domain;
+  return domainAliveMutex.acquire(domain, async () => {
+    domain = domain[0] === '.' ? domain.slice(1) : domain;
 
-  const aDns: string[] = [];
-  const aaaaDns: string[] = [];
+    const $domain = isIncludeAllSubdomain ? '.' + domain : domain;
 
-  // test 2 times before make sure record is empty
-  const servers = pickRandom(dohServers, 2);
-  for (let i = 0; i < 2; i++) {
-    // eslint-disable-next-line no-await-in-loop -- sequential
-    const aRecords = (await $resolve($domain, 'A', servers[i]));
-    if (aRecords.answers.length > 0) {
-      return onDomainAlive(domain);
-    }
+    const aDns: string[] = [];
+    const aaaaDns: string[] = [];
 
-    aDns.push(aRecords.dns);
-  }
-  for (let i = 0; i < 2; i++) {
+    // test 2 times before make sure record is empty
+    const servers = pickRandom(dohServers, 2);
+    for (let i = 0; i < 2; i++) {
     // eslint-disable-next-line no-await-in-loop -- sequential
-    const aaaaRecords = (await $resolve($domain, 'AAAA', servers[i]));
-    if (aaaaRecords.answers.length > 0) {
-      return onDomainAlive(domain);
-    }
-
-    aaaaDns.push(aaaaRecords.dns);
-  }
+      const aRecords = (await $resolve(domain, 'A', servers[i]));
+      if (aRecords.answers.length > 0) {
+        domainAliveMap.set($domain, true);
+        return true;
+      }
 
-  // only then, let's test twice with domesticDohServers
-  for (let i = 0; i < 2; i++) {
+      aDns.push(aRecords.dns);
+    }
+    for (let i = 0; i < 2; i++) {
     // eslint-disable-next-line no-await-in-loop -- sequential
-    const aRecords = (await $resolve($domain, 'A', pickOne(domesticDohServers)));
-    if (aRecords.answers.length > 0) {
-      return onDomainAlive(domain);
+      const aaaaRecords = (await $resolve(domain, 'AAAA', servers[i]));
+      if (aaaaRecords.answers.length > 0) {
+        domainAliveMap.set($domain, true);
+        return true;
+      }
+
+      aaaaDns.push(aaaaRecords.dns);
     }
-    aDns.push(aRecords.dns);
-  }
 
-  for (let i = 0; i < 2; i++) {
+    // only then, let's test twice with domesticDohServers
+    for (let i = 0; i < 2; i++) {
     // eslint-disable-next-line no-await-in-loop -- sequential
-    const aaaaRecords = (await $resolve($domain, 'AAAA', pickOne(domesticDohServers)));
-    if (aaaaRecords.answers.length > 0) {
-      return onDomainAlive(domain);
+      const aRecords = (await $resolve(domain, 'A', pickOne(domesticDohServers)));
+      if (aRecords.answers.length > 0) {
+        domainAliveMap.set($domain, true);
+        return true;
+      }
+      aDns.push(aRecords.dns);
+    }
+    for (let i = 0; i < 2; i++) {
+      // eslint-disable-next-line no-await-in-loop -- sequential
+      const aaaaRecords = (await $resolve(domain, 'AAAA', pickOne(domesticDohServers)));
+      if (aaaaRecords.answers.length > 0) {
+        domainAliveMap.set($domain, true);
+        return true;
+      }
+      aaaaDns.push(aaaaRecords.dns);
     }
-    aaaaDns.push(aaaaRecords.dns);
-  }
-
-  console.log(picocolors.red('[domain dead]'), 'no A/AAAA records', { domain, a: aDns, aaaa: aaaaDns });
-  return onDomainDead($domain);
-}
 
-const apexDomainNsResolvePromiseMap = new Map<string, Promise<boolean>>();
+    console.log(picocolors.red('[domain dead]'), 'no A/AAAA records', { domain, a: aDns, aaaa: aaaaDns });
 
-async function getNS(domain: string) {
-  const servers = pickRandom(dohServers, 2);
-  for (let i = 0, len = servers.length; i < len; i++) {
-    const server = servers[i];
-    // eslint-disable-next-line no-await-in-loop -- one by one
-    const resp = await $resolve(domain, 'NS', server);
-    if (resp.answers.length > 0) {
-      return true;
-    }
-  }
-  return false;
+    domainAliveMap.set($domain, false);
+    return false;
+  });
 }
 
-async function isApexDomainAlive(apexDomain: string): Promise<[string, boolean]> {
+const apexDomainMap = createKeyedAsyncMutex('isApexDomainAlive');
+
+function isApexDomainAlive(apexDomain: string) {
   if (domainAliveMap.has(apexDomain)) {
-    return [apexDomain, domainAliveMap.get(apexDomain)!];
+    return domainAliveMap.get(apexDomain)!;
   }
 
-  let hasNS: boolean;
-  if (apexDomainNsResolvePromiseMap.has(apexDomain)) {
-    hasNS = await apexDomainNsResolvePromiseMap.get(apexDomain)!;
-  } else {
-    const promise = getNS(apexDomain);
-    apexDomainNsResolvePromiseMap.set(apexDomain, promise);
-    hasNS = await promise;
-  }
+  return apexDomainMap.acquire(apexDomain, async () => {
+    const servers = pickRandom(dohServers, 2);
+    for (let i = 0, len = servers.length; i < len; i++) {
+      const server = servers[i];
+      // eslint-disable-next-line no-await-in-loop -- one by one
+      const resp = await $resolve(apexDomain, 'NS', server);
+      if (resp.answers.length > 0) {
+        domainAliveMap.set(apexDomain, true);
+        return true;
+      }
+    }
 
-  if (hasNS) {
-    return onDomainAlive(apexDomain);
-  }
+    let whois;
+    try {
+      whois = await getWhois(apexDomain);
+    } catch (e) {
+      console.log(picocolors.red('[whois error]'), { domain: apexDomain }, e);
+      domainAliveMap.set(apexDomain, true);
+      return true;
+    }
 
-  let whois;
+    const whoisError = noWhois(whois);
+    if (!whoisError) {
+      console.log(picocolors.gray('[domain alive]'), picocolors.gray('whois found'), { domain: apexDomain });
+      domainAliveMap.set(apexDomain, true);
+      return true;
+    }
 
-  try {
-    whois = await getWhois(apexDomain);
-  } catch (e) {
-    console.log(picocolors.red('[whois error]'), { domain: apexDomain }, e);
-    return onDomainAlive(apexDomain);
-  }
+    console.log(picocolors.red('[domain dead]'), 'whois not found', { domain: apexDomain, err: whoisError });
 
-  if (process.env.DEBUG) {
-    console.log(JSON.stringify(whois, null, 2));
-  }
+    domainAliveMap.set(apexDomain, false);
+    return false;
+  });
+}
 
-  const whoisError = noWhois(whois);
-  if (!whoisError) {
-    console.log(picocolors.gray('[domain alive]'), picocolors.gray('whois found'), { domain: apexDomain });
-    return onDomainAlive(apexDomain);
+async function $resolve(name: string, type: DNS2.PacketQuestion, server: [string, DNS2.DnsResolver]) {
+  try {
+    return await asyncRetry(async () => {
+      const [dohServer, dohClient] = server;
+
+      try {
+        return {
+          ...await dohClient(name, type),
+          dns: dohServer
+        } satisfies DnsResponse;
+      } catch (e) {
+        // console.error(e);
+        throw new DnsError((e as Error).message, dohServer);
+      }
+    }, { retries: 5 });
+  } catch (e) {
+    console.log('[doh error]', name, type, e);
+    throw e;
   }
+}
 
-  console.log(picocolors.red('[domain dead]'), 'whois not found', { domain: apexDomain, err: whoisError });
-  return onDomainDead('.' + apexDomain);
+async function getWhois(domain: string) {
+  return asyncRetry(() => whoiser.domain(domain, { raw: true }), { retries: 5 });
 }
 
 // TODO: this is a workaround for https://github.com/LayeredStudio/whoiser/issues/117
@@ -269,11 +246,10 @@ const whoisNotFoundKeywordTest = createKeywordFilter([
   // 'pendingdelete',
   ' has been blocked by '
 ]);
-
 // whois server can redirect, so whoiser might/will get info from multiple whois servers
 // some servers (like TLD whois servers) might have cached/outdated results
 // we can only make sure a domain is alive once all response from all whois servers demonstrate so
-export function noWhois(whois: whoiser.WhoisSearchResult): null | string {
+function noWhois(whois: whoiser.WhoisSearchResult): null | string {
   let empty = true;
 
   for (const key in whois) {

+ 23 - 0
Build/lib/keyed-async-mutex.ts

@@ -0,0 +1,23 @@
+const globalMap = new Map<string, Map<string, Promise<unknown>>>();
+
+export function createKeyedAsyncMutex(globalNamespaceKey: string) {
+  let map;
+  if (globalMap.has(globalNamespaceKey)) {
+    map = globalMap.get(globalNamespaceKey)!;
+  } else {
+    map = new Map();
+    globalMap.set(globalNamespaceKey, map);
+  }
+
+  return {
+    async acquire<T = unknown>(key: string, fn: () => Promise<T>) {
+      if (map.has(key)) {
+        return map.get(key);
+      }
+
+      const promise = fn();
+      map.set(key, promise);
+      return promise;
+    }
+  };
+}

+ 9 - 15
Build/validate-domain-alive.ts

@@ -1,18 +1,10 @@
 import { SOURCE_DIR } from './constants/dir';
 import path from 'node:path';
-import { newQueue } from '@henrygd/queue';
-import { isDomainAlive, keyedAsyncMutexWithQueue } from './lib/is-domain-alive';
+import { isDomainAlive } from './lib/is-domain-alive';
 import { fdir as Fdir } from 'fdir';
 import runAgainstSourceFile from './lib/run-against-source-file';
 
-const queue = newQueue(24);
-
 const deadDomains: string[] = [];
-function onDomain(args: [string, boolean]) {
-  if (!args[1]) {
-    deadDomains.push(args[0]);
-  }
-}
 
 (async () => {
   const domainSets = await new Fdir()
@@ -42,12 +34,14 @@ function onDomain(args: [string, boolean]) {
   ].map(
     filepath => runAgainstSourceFile(
       filepath,
-      (domain: string, includeAllSubdomain: boolean) => promises.push(queue.add(
-        () => keyedAsyncMutexWithQueue(
-          domain,
-          () => isDomainAlive(domain, includeAllSubdomain)
-        ).then(onDomain)
-      ))
+      (domain: string, includeAllSubdomain: boolean) => promises.push(
+        isDomainAlive(domain, includeAllSubdomain).then((alive) => {
+          if (alive) {
+            return;
+          }
+          deadDomains.push(includeAllSubdomain ? '.' + domain : domain);
+        })
+      )
     ).then(() => console.log('[crawl]', filepath))
   ));