_get-lum-apex-domains.ts 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. import { fetchRemoteTextByLine, readFileByLine } from './lib/fetch-text-by-line';
  2. import tldts from 'tldts';
  3. import { HostnameSmolTrie } from './lib/trie';
  4. import path from 'node:path';
  5. import { SOURCE_DIR } from './constants/dir';
  6. import { processLine } from './lib/process-line';
  7. (async () => {
  8. const lines1 = await Array.fromAsync(await fetchRemoteTextByLine('https://raw.githubusercontent.com/durablenapkin/block/master/luminati.txt', true));
  9. const lines2 = await Array.fromAsync(await fetchRemoteTextByLine('https://raw.githubusercontent.com/durablenapkin/block/master/tvstream.txt', true));
  10. const trie = new HostnameSmolTrie();
  11. lines1.forEach((line) => {
  12. const apexDomain = tldts.getDomain(line.slice(8));
  13. if (apexDomain) {
  14. trie.add(apexDomain);
  15. }
  16. });
  17. lines2.forEach((line) => {
  18. const apexDomain = tldts.getDomain(line.slice(8));
  19. if (apexDomain) {
  20. trie.add(apexDomain);
  21. }
  22. });
  23. const dataFromDuckDuckGo = await fetch('https://raw.githubusercontent.com/duckduckgo/tracker-radar/92e086ce38a8a88c964ed0184e5277ec1d5c8038/entities/Bright%20Data%20Ltd..json').then((res) => res.json());
  24. if (typeof dataFromDuckDuckGo === 'object' && dataFromDuckDuckGo !== null && 'properties' in dataFromDuckDuckGo && Array.isArray(dataFromDuckDuckGo.properties)) {
  25. dataFromDuckDuckGo.properties.forEach((prop) => {
  26. trie.add(prop);
  27. });
  28. }
  29. for await (const line of readFileByLine(path.join(SOURCE_DIR, 'domainset', 'reject.conf'))) {
  30. const l = processLine(line);
  31. if (l) {
  32. trie.whitelist(l);
  33. }
  34. }
  35. console.log(trie.dump().map(i => '.' + i).join('\n'));
  36. })();