validate-domain-alive.ts 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. import DNS2 from 'dns2';
  2. import { readFileByLine } from './lib/fetch-text-by-line';
  3. import { processLine } from './lib/process-line';
  4. import tldts from 'tldts';
  5. import { looseTldtsOpt } from './constants/loose-tldts-opt';
  6. import { fdir as Fdir } from 'fdir';
  7. import { SOURCE_DIR } from './constants/dir';
  8. import path from 'node:path';
  9. import { newQueue } from '@henrygd/queue';
  10. import asyncRetry from 'async-retry';
  11. import * as whoiser from 'whoiser';
  12. import picocolors from 'picocolors';
  13. import createKeywordFilter from './lib/aho-corasick';
  14. const dohServers: Array<[string, DNS2.DnsResolver]> = ([
  15. '8.8.8.8',
  16. '8.8.4.4',
  17. '1.0.0.1',
  18. '1.1.1.1',
  19. '162.159.36.1',
  20. '162.159.46.1',
  21. '101.101.101.101', // TWNIC
  22. '185.222.222.222', // DNS.SB
  23. '45.11.45.11', // DNS.SB
  24. '9.9.9.10', // Quad9 unfiltered
  25. '149.112.112.10', // Quad9 unfiltered
  26. '208.67.222.2', // OpenDNS sandbox (unfiltered)
  27. '208.67.220.2', // OpenDNS sandbox (unfiltered)
  28. '94.140.14.140', // AdGuard unfiltered
  29. '94.140.14.141', // AdGuard unfiltered
  30. // '76.76.2.0', // ControlD unfiltered, path not /dns-query
  31. // '76.76.10.0', // ControlD unfiltered, path not /dns-query
  32. '193.110.81.0', // dns0.eu
  33. '185.253.5.0', // dns0.eu
  34. 'dns.nextdns.io',
  35. 'wikimedia-dns.org',
  36. // 'ordns.he.net',
  37. 'dns.mullvad.net'
  38. // 'ada.openbld.net',
  39. // 'dns.rabbitdns.org'
  40. ] as const).map(server => [
  41. server,
  42. DNS2.DOHClient({
  43. dns: server,
  44. http: false
  45. })
  46. ] as const);
  47. const queue = newQueue(8);
  48. class DnsError extends Error {
  49. name = 'DnsError';
  50. constructor(readonly message: string, public readonly server: string) {
  51. super(message);
  52. }
  53. }
  54. interface DnsResponse extends DNS2.$DnsResponse {
  55. dns: string
  56. }
  57. const resolve: DNS2.DnsResolver<DnsResponse> = async (...args) => {
  58. try {
  59. return await asyncRetry(async () => {
  60. const [dohServer, dohClient] = dohServers[Math.floor(Math.random() * dohServers.length)];
  61. try {
  62. const resp = await dohClient(...args);
  63. return {
  64. ...resp,
  65. dns: dohServer
  66. } satisfies DnsResponse;
  67. } catch (e) {
  68. throw new DnsError((e as Error).message, dohServer);
  69. }
  70. }, { retries: 5 });
  71. } catch (e) {
  72. console.log('[doh error]', ...args, e);
  73. throw e;
  74. }
  75. };
  76. (async () => {
  77. const domainSets = await new Fdir()
  78. .withFullPaths()
  79. .crawl(SOURCE_DIR + path.sep + 'domainset')
  80. .withPromise();
  81. const domainRules = await new Fdir()
  82. .withFullPaths()
  83. .crawl(SOURCE_DIR + path.sep + 'non_ip')
  84. .withPromise();
  85. await Promise.all([
  86. ...domainSets.map(runAgainstDomainset),
  87. ...domainRules.map(runAgainstRuleset)
  88. ]);
  89. console.log('done');
  90. })();
  91. const whoisNotFoundKeywordTest = createKeywordFilter([
  92. 'no match for',
  93. 'does not exist',
  94. 'not found'
  95. ]);
  96. const domainAliveMap = new Map<string, boolean>();
  97. async function isApexDomainAlive(apexDomain: string): Promise<[string, boolean]> {
  98. if (domainAliveMap.has(apexDomain)) {
  99. return [apexDomain, domainAliveMap.get(apexDomain)!];
  100. }
  101. const resp = await resolve(apexDomain, 'NS');
  102. if (resp.answers.length > 0) {
  103. return [apexDomain, true];
  104. }
  105. let whois;
  106. try {
  107. whois = await whoiser.domain(apexDomain);
  108. } catch (e) {
  109. console.log('[whois fail]', 'whois error', { domain: apexDomain }, e);
  110. return [apexDomain, true];
  111. }
  112. if (Object.keys(whois).length > 0) {
  113. // TODO: this is a workaround for https://github.com/LayeredStudio/whoiser/issues/117
  114. if ('text' in whois && Array.isArray(whois.text) && whois.text.some(value => whoisNotFoundKeywordTest(value.toLowerCase()))) {
  115. console.log(picocolors.red('[domain dead]'), 'whois no match', { domain: apexDomain });
  116. domainAliveMap.set(apexDomain, false);
  117. return [apexDomain, false];
  118. }
  119. return [apexDomain, true];
  120. }
  121. if (!('dns' in whois)) {
  122. console.log({ whois });
  123. }
  124. console.log(picocolors.red('[domain dead]'), 'whois no match', { domain: apexDomain });
  125. domainAliveMap.set(apexDomain, false);
  126. return [apexDomain, false];
  127. }
  128. const domainMutex = new Map<string, Promise<[string, boolean]>>();
  129. export async function isDomainAlive(domain: string, isSuffix: boolean): Promise<[string, boolean]> {
  130. if (domain[0] === '.') {
  131. domain = domain.slice(1);
  132. }
  133. const apexDomain = tldts.getDomain(domain, looseTldtsOpt);
  134. if (!apexDomain) {
  135. console.log('[domain invalid]', 'no apex domain', { domain });
  136. return [domain, true] as const;
  137. }
  138. let apexDomainAlivePromise;
  139. if (domainMutex.has(domain)) {
  140. apexDomainAlivePromise = domainMutex.get(domain)!;
  141. } else {
  142. apexDomainAlivePromise = queue.add(() => isApexDomainAlive(apexDomain).then(res => {
  143. domainMutex.delete(domain);
  144. return res;
  145. }));
  146. domainMutex.set(domain, apexDomainAlivePromise);
  147. }
  148. const apexDomainAlive = await apexDomainAlivePromise;
  149. if (!apexDomainAlive[1]) {
  150. domainAliveMap.set(domain, false);
  151. return [domain, false] as const;
  152. }
  153. if (!isSuffix) {
  154. const aRecords = (await resolve(domain, 'A'));
  155. if (aRecords.answers.length === 0) {
  156. const aaaaRecords = (await resolve(domain, 'AAAA'));
  157. if (aaaaRecords.answers.length === 0) {
  158. console.log(picocolors.red('[domain dead]'), 'no A/AAAA records', { domain, a: aRecords.dns, aaaa: aaaaRecords.dns });
  159. domainAliveMap.set(domain, false);
  160. return [domain, false] as const;
  161. }
  162. }
  163. }
  164. domainAliveMap.set(domain, true);
  165. return [domain, true] as const;
  166. }
  167. export async function runAgainstRuleset(filepath: string) {
  168. const promises: Array<Promise<[string, boolean]>> = [];
  169. for await (const l of readFileByLine(filepath)) {
  170. const line = processLine(l);
  171. if (!line) continue;
  172. const [type, domain] = line.split(',');
  173. switch (type) {
  174. case 'DOMAIN-SUFFIX':
  175. case 'DOMAIN': {
  176. if (!domainMutex.has(domain)) {
  177. const promise = queue.add(() => isDomainAlive(domain, type === 'DOMAIN-SUFFIX')).then(res => {
  178. domainMutex.delete(domain);
  179. return res;
  180. });
  181. domainMutex.set(domain, promise);
  182. promises.push(promise);
  183. }
  184. break;
  185. }
  186. // no default
  187. // case 'DOMAIN-KEYWORD': {
  188. // break;
  189. // }
  190. // no default
  191. }
  192. }
  193. return Promise.all(promises);
  194. }
  195. export async function runAgainstDomainset(filepath: string) {
  196. const promises: Array<Promise<[string, boolean]>> = [];
  197. for await (const l of readFileByLine(filepath)) {
  198. const line = processLine(l);
  199. if (!line) continue;
  200. promises.push(isDomainAlive(line, line[0] === '.'));
  201. }
  202. await Promise.all(promises);
  203. console.log('[done]', filepath);
  204. }