build-cdn-download-conf.ts 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. import path from 'node:path';
  2. import { readFileIntoProcessedArray } from './lib/fetch-text-by-line';
  3. import { task } from './trace';
  4. import { SHARED_DESCRIPTION } from './constants/description';
  5. import { appendArrayInPlace } from 'foxts/append-array-in-place';
  6. import { SOURCE_DIR } from './constants/dir';
  7. import { DomainsetOutput } from './lib/rules/domainset';
  8. import { CRASHLYTICS_WHITELIST } from './constants/reject-data-source';
  9. import Worktank from 'worktank';
  10. const cdnDomainsListPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/cdn.conf'));
  11. const downloadDomainSetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/download.conf'));
  12. const steamDomainSetPromise = readFileIntoProcessedArray(path.join(SOURCE_DIR, 'domainset/game-download.conf'));
  13. const pool = new Worktank({
  14. pool: {
  15. name: 'extract-s3-from-publicssuffix',
  16. size: 1 // The number of workers to keep in the pool, if more workers are needed they will be spawned up to this limit
  17. },
  18. worker: {
  19. autoAbort: 10000,
  20. autoTerminate: 20000, // The interval of milliseconds at which to check if the pool can be automatically terminated, to free up resources, workers will be spawned up again if needed
  21. autoInstantiate: true,
  22. methods: {
  23. // eslint-disable-next-line object-shorthand -- workertank
  24. getS3OSSDomains: async function (__filename: string): Promise<string[]> {
  25. // TODO: createRequire is a temporary workaround for https://github.com/nodejs/node/issues/51956
  26. const { default: module } = await import('node:module');
  27. const __require = module.createRequire(__filename);
  28. const { HostnameTrie } = __require('./lib/trie') as typeof import('./lib/trie');
  29. const { fetchRemoteTextByLine } = __require('./lib/fetch-text-by-line') as typeof import('./lib/fetch-text-by-line');
  30. const trie = new HostnameTrie();
  31. for await (const line of await fetchRemoteTextByLine('https://publicsuffix.org/list/public_suffix_list.dat', true)) {
  32. trie.add(line);
  33. }
  34. /**
  35. * Extract OSS domain from publicsuffix list
  36. */
  37. const S3OSSDomains: string[] = [];
  38. trie.find('.amazonaws.com').forEach((line: string) => {
  39. if (
  40. (line.startsWith('s3-') || line.startsWith('s3.'))
  41. && !line.includes('cn-')
  42. ) {
  43. S3OSSDomains.push('.' + line);
  44. }
  45. });
  46. trie.find('.scw.cloud').forEach((line: string) => {
  47. if (
  48. (line.startsWith('s3-') || line.startsWith('s3.'))
  49. // && !line.includes('cn-')
  50. ) {
  51. S3OSSDomains.push('.' + line);
  52. }
  53. });
  54. trie.find('sakurastorage.jp').forEach((line: string) => {
  55. if (
  56. (line.startsWith('s3-') || line.startsWith('s3.'))
  57. ) {
  58. S3OSSDomains.push('.' + line);
  59. }
  60. });
  61. return S3OSSDomains;
  62. }
  63. }
  64. }
  65. });
  66. export const buildCdnDownloadConf = task(require.main === module, __filename)(async (span) => {
  67. const [
  68. S3OSSDomains,
  69. cdnDomainsList,
  70. downloadDomainSet,
  71. steamDomainSet
  72. ] = await Promise.all([
  73. span.traceChildAsync(
  74. 'download public suffix list for s3',
  75. () => pool.exec(
  76. 'getS3OSSDomains',
  77. [__filename]
  78. ).finally(() => pool.terminate())
  79. ),
  80. cdnDomainsListPromise,
  81. downloadDomainSetPromise,
  82. steamDomainSetPromise
  83. ]);
  84. // Move S3 domains to download domain set, since S3 files may be large
  85. appendArrayInPlace(downloadDomainSet, S3OSSDomains);
  86. appendArrayInPlace(downloadDomainSet, steamDomainSet);
  87. // we have whitelisted the crashlytics domain, and we also want to put it in CDN policy
  88. appendArrayInPlace(cdnDomainsList, CRASHLYTICS_WHITELIST);
  89. return Promise.all([
  90. new DomainsetOutput(span, 'cdn')
  91. .withTitle('Sukka\'s Ruleset - CDN Domains')
  92. .appendDescription(SHARED_DESCRIPTION)
  93. .appendDescription(
  94. '',
  95. 'This file contains object storage and static assets CDN domains.'
  96. )
  97. .addFromDomainset(cdnDomainsList)
  98. .write(),
  99. new DomainsetOutput(span, 'download')
  100. .withTitle('Sukka\'s Ruleset - Large Files Hosting Domains')
  101. .appendDescription(SHARED_DESCRIPTION)
  102. .appendDescription(
  103. '',
  104. 'This file contains domains for software updating & large file hosting.'
  105. )
  106. .addFromDomainset(downloadDomainSet)
  107. .write()
  108. ]);
  109. });