build-cdn-download-conf.ts 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. import path from 'node:path';
  2. import { readFileIntoProcessedArray } from './lib/fetch-text-by-line';
  3. import { task } from './trace';
  4. import { SHARED_DESCRIPTION } from './constants/description';
  5. import { appendArrayInPlace } from 'foxts/append-array-in-place';
  6. import { SOURCE_DIR } from './constants/dir';
  7. import { DomainsetOutput } from './lib/rules/domainset';
  8. import { CRASHLYTICS_WHITELIST } from './constants/reject-data-source';
  9. import Worktank from 'worktank';
  10. import { $$fetch } from './lib/fetch-retry';
  11. import { fastUri } from 'fast-uri';
  12. const cdnDomainSetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/cdn.conf'));
  13. const downloadDomainSetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/download.conf'));
  14. const steamDomainSetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/game-download.conf'));
  15. const pool = new Worktank({
  16. pool: {
  17. name: 'extract-s3-from-publicssuffix',
  18. size: 1 // The number of workers to keep in the pool, if more workers are needed they will be spawned up to this limit
  19. },
  20. worker: {
  21. autoAbort: 10000,
  22. autoTerminate: 20000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
  23. autoInstantiate: true,
  24. methods: {
  25. // eslint-disable-next-line object-shorthand -- workertank
  26. getS3OSSDomains: async function (__filename: string): Promise<string[]> {
  27. // TODO: createRequire is a temporary workaround for https://github.com/nodejs/node/issues/51956
  28. const { default: module } = await import('node:module');
  29. const __require = module.createRequire(__filename);
  30. const { HostnameTrie } = __require('./lib/trie') as typeof import('./lib/trie');
  31. const { fetchRemoteTextByLine } = __require('./lib/fetch-text-by-line') as typeof import('./lib/fetch-text-by-line');
  32. const trie = new HostnameTrie();
  33. for await (const line of await fetchRemoteTextByLine('https://publicsuffix.org/list/public_suffix_list.dat', true)) {
  34. trie.add(line);
  35. }
  36. /**
  37. * Extract OSS domain from publicsuffix list
  38. */
  39. const S3OSSDomains: string[] = [];
  40. trie.find('.amazonaws.com').forEach((line: string) => {
  41. if (
  42. (line.startsWith('s3-') || line.startsWith('s3.'))
  43. && !line.includes('cn-')
  44. ) {
  45. S3OSSDomains.push('.' + line);
  46. }
  47. });
  48. trie.find('.scw.cloud').forEach((line: string) => {
  49. if (
  50. (line.startsWith('s3-') || line.startsWith('s3.'))
  51. // && !line.includes('cn-')
  52. ) {
  53. S3OSSDomains.push('.' + line);
  54. }
  55. });
  56. trie.find('sakurastorage.jp').forEach((line: string) => {
  57. if (
  58. (line.startsWith('s3-') || line.startsWith('s3.'))
  59. ) {
  60. S3OSSDomains.push('.' + line);
  61. }
  62. });
  63. return S3OSSDomains;
  64. }
  65. }
  66. }
  67. });
  68. export const buildCdnDownloadConf = task(require.main === module, __filename)(async (span) => {
  69. const [
  70. S3OSSDomains,
  71. IPFSDomains,
  72. cdnDomainsList,
  73. downloadDomainSet,
  74. steamDomainSet
  75. ] = await Promise.all([
  76. span.traceChildAsync(
  77. 'download public suffix list for s3',
  78. () => pool.exec(
  79. 'getS3OSSDomains',
  80. [__filename]
  81. ).finally(() => pool.terminate())
  82. ),
  83. span.traceChildAsync(
  84. 'load public ipfs gateway list',
  85. async () => {
  86. const data = await (await $$fetch('https://cdn.jsdelivr.net/gh/ipfs/public-gateway-checker@refs/heads/main/gateways.json')).json();
  87. if (!Array.isArray(data)) {
  88. console.error('Invalid IPFS gateway list format');
  89. return [];
  90. }
  91. return data.reduce<string[]>((acc, gateway) => {
  92. if (typeof gateway !== 'string') {
  93. return acc;
  94. }
  95. const hn = fastUri.parse(gateway).host;
  96. if (hn) {
  97. acc.push(hn.trim());
  98. }
  99. return acc;
  100. }, []);
  101. }
  102. ),
  103. cdnDomainSetPromise,
  104. downloadDomainSetPromise,
  105. steamDomainSetPromise
  106. ]);
  107. // Move S3 domains to download domain set, since S3 files may be large
  108. appendArrayInPlace(downloadDomainSet, S3OSSDomains);
  109. appendArrayInPlace(downloadDomainSet, steamDomainSet);
  110. // we have whitelisted the crashlytics domain, and we also want to put it in CDN policy
  111. appendArrayInPlace(cdnDomainsList, CRASHLYTICS_WHITELIST);
  112. return Promise.all([
  113. new DomainsetOutput(span, 'cdn')
  114. .withTitle('Sukka\'s Ruleset - CDN Domains')
  115. .appendDescription(SHARED_DESCRIPTION)
  116. .appendDescription(
  117. '',
  118. 'This file contains object storage and static assets CDN domains.'
  119. )
  120. .addFromDomainset(cdnDomainsList)
  121. .bulkAddDomainSuffix(IPFSDomains)
  122. .write(),
  123. new DomainsetOutput(span, 'download')
  124. .withTitle('Sukka\'s Ruleset - Large Files Hosting Domains')
  125. .appendDescription(SHARED_DESCRIPTION)
  126. .appendDescription(
  127. '',
  128. 'This file contains domains for software updating & large file hosting.'
  129. )
  130. .addFromDomainset(downloadDomainSet)
  131. .write()
  132. ]);
  133. });