build-internal-cdn-rules.ts 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. import fsp from 'fs/promises';
  2. import path from 'path';
  3. import * as tldts from 'tldts';
  4. import { processLine } from './lib/process-line';
  5. import { readFileByLine } from './lib/fetch-text-by-line';
  6. import { createDomainSorter } from './lib/stable-sort-domain';
  7. import { task } from './lib/trace-runner';
  8. import { compareAndWriteFile } from './lib/create-file';
  9. import { getGorhillPublicSuffixPromise } from './lib/get-gorhill-publicsuffix';
  10. // const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse');
  11. const escapeRegExp = (string = '') => string.replaceAll(/[$()*+.?[\\\]^{|}]/g, '\\$&');
  12. export const buildInternalCDNDomains = task(import.meta.path, async () => {
  13. const set = new Set<string>();
  14. const keywords = new Set<string>();
  15. const addApexDomain = (input: string) => {
  16. // We are including the private domains themselves
  17. const d = tldts.getDomain(input, { allowPrivateDomains: false });
  18. if (d) {
  19. set.add(d);
  20. }
  21. };
  22. const processLocalDomainSet = async (domainSetPath: string) => {
  23. for await (const line of readFileByLine(domainSetPath)) {
  24. // console.log({ line });
  25. const parsed = tldts.parse(line, { allowPrivateDomains: true, detectIp: false });
  26. if (parsed.isIp) continue;
  27. if (parsed.isIcann || parsed.isPrivate) {
  28. if (parsed.domain) {
  29. set.add(parsed.domain);
  30. }
  31. continue;
  32. }
  33. if (processLine(line)) {
  34. console.warn('[drop line from domainset]', line);
  35. }
  36. }
  37. };
  38. const processLocalRuleSet = async (ruleSetPath: string) => {
  39. for await (const line of readFileByLine(ruleSetPath)) {
  40. if (line.startsWith('DOMAIN-SUFFIX,')) {
  41. addApexDomain(line.replace('DOMAIN-SUFFIX,', ''));
  42. } else if (line.startsWith('DOMAIN,')) {
  43. addApexDomain(line.replace('DOMAIN,', ''));
  44. } else if (line.startsWith('DOMAIN-KEYWORD')) {
  45. keywords.add(escapeRegExp(line.replace('DOMAIN-KEYWORD,', '')));
  46. } else if (line.startsWith('USER-AGENT,') || line.startsWith('PROCESS-NAME,') || line.startsWith('URL-REGEX,')) {
  47. // do nothing
  48. } else if (processLine(line)) {
  49. console.warn('[drop line from ruleset]', line);
  50. }
  51. }
  52. };
  53. const [domainSorter] = await Promise.all([
  54. getGorhillPublicSuffixPromise().then(createDomainSorter),
  55. processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/cdn.conf')),
  56. processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global.conf')),
  57. processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/global_plus.conf')),
  58. processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/my_proxy.conf')),
  59. processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/stream.conf')),
  60. processLocalRuleSet(path.resolve(import.meta.dir, '../List/non_ip/telegram.conf')),
  61. processLocalDomainSet(path.resolve(import.meta.dir, '../List/domainset/cdn.conf')),
  62. processLocalDomainSet(path.resolve(import.meta.dir, '../List/domainset/download.conf')),
  63. fsp.mkdir(path.resolve(import.meta.dir, '../List/internal'), { recursive: true })
  64. ]);
  65. return compareAndWriteFile(
  66. [
  67. ...Array.from(set).sort(domainSorter).map(i => `SUFFIX,${i}`),
  68. ...Array.from(keywords).sort().map(i => `REGEX,${i}`)
  69. ],
  70. path.resolve(import.meta.dir, '../List/internal/cdn.txt')
  71. );
  72. });
  73. if (import.meta.main) {
  74. buildInternalCDNDomains();
  75. }