tools-dedupe-src.ts 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. import { fdir as Fdir } from 'fdir';
  2. import path from 'node:path';
  3. import fsp from 'node:fs/promises';
  4. import { SOURCE_DIR } from './constants/dir';
  5. import { readFileByLine } from './lib/fetch-text-by-line';
  6. import { processLine } from './lib/process-line';
  7. import { HostnameSmolTrie, HostnameTrie } from './lib/trie';
  8. import { task } from './trace';
  9. const ENFORCED_WHITELIST = [
  10. 'hola.sk',
  11. 'hola.org',
  12. 'hola-shopping.com',
  13. 'mynextphone.io',
  14. 'iadmatapk.nosdn.127.net',
  15. 'httpdns.bilivideo.com',
  16. 'httpdns-v6.gslb.yy.com',
  17. 'twemoji.maxcdn.com',
  18. 'samsungcloudsolution.com',
  19. 'samsungcloudsolution.net',
  20. 'samsungqbe.com',
  21. 'ntp.api.bz',
  22. 'cdn.tuk.dev'
  23. ];
  24. const WHITELIST: string[] = ['ntp.api.bz', 'httpdns.bilivideo.com', 'httpdns-v6.gslb.yy.com', 'cdn.tuk.dev', 'cpan.catalyst.net.nz', 'adx.yixin.im', 'vocadb-analytics.fly.dev', 'ad.yixin.im', 'cdn.javsts.com', 'files.webfe.shopee.io'];
  25. task(require.main === module, __filename)(async (span) => {
  26. const files = await span.traceChildAsync('crawl thru all files', () => new Fdir()
  27. .withFullPaths()
  28. .filter((filepath, isDirectory) => {
  29. if (isDirectory) return true;
  30. const extname = path.extname(filepath);
  31. return extname !== '.js' && extname !== '.ts';
  32. })
  33. .crawl(SOURCE_DIR)
  34. .withPromise());
  35. const whiteTrie = span.traceChildSync('build whitelist trie', () => {
  36. const trie = new HostnameSmolTrie(WHITELIST);
  37. ENFORCED_WHITELIST.forEach((item) => trie.whitelist(item));
  38. return trie;
  39. });
  40. await Promise.all(files.map(file => span.traceChildAsync('dedupe ' + file, () => dedupeFile(file, whiteTrie))));
  41. });
  42. async function dedupeFile(file: string, whitelist: HostnameSmolTrie) {
  43. const result: string[] = [];
  44. const trie = new HostnameTrie();
  45. let line: string | null = '';
  46. for await (const l of readFileByLine(file)) {
  47. line = processLine(l);
  48. if (!line) {
  49. if (l.startsWith('# $ skip_dedupe_src')) {
  50. return;
  51. }
  52. result.push(l); // keep all comments and blank lines
  53. continue;
  54. }
  55. if (trie.has(line)) {
  56. continue; // drop duplicate
  57. }
  58. if (whitelist.has(line)) {
  59. continue; // drop whitelisted items
  60. }
  61. trie.add(line);
  62. result.push(line);
  63. }
  64. return fsp.writeFile(file, result.join('\n') + '\n');
  65. }
  66. // function isDomainSuffix(whiteItem: string, incomingItem: string) {
  67. // const whiteIncludeDomain = whiteItem[0] === '.';
  68. // whiteItem = whiteItem[0] === '.' ? whiteItem.slice(1) : whiteItem;
  69. // if (whiteItem === incomingItem) {
  70. // return true; // as long as exact match, we don't care if subdomain is included or not
  71. // }
  72. // if (whiteIncludeDomain) {
  73. // return incomingItem.endsWith('.' + whiteItem);
  74. // }
  75. // return false;
  76. // }