tools-dedupe-src.ts 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. import { fdir as Fdir } from 'fdir';
  2. import path from 'node:path';
  3. import fsp from 'node:fs/promises';
  4. import { SOURCE_DIR } from './constants/dir';
  5. import { readFileByLine } from './lib/fetch-text-by-line';
  6. import { processLine } from './lib/process-line';
  7. import { HostnameSmolTrie } from './lib/trie';
  8. import { task } from './trace';
  9. const ENFORCED_WHITELIST = [
  10. 'hola.sk',
  11. 'hola.org',
  12. 'hola-shopping.com',
  13. 'mynextphone.io',
  14. 'iadmatapk.nosdn.127.net',
  15. 'httpdns.bilivideo.com',
  16. 'httpdns-v6.gslb.yy.com',
  17. 'twemoji.maxcdn.com',
  18. 'samsungcloudsolution.com',
  19. 'samsungcloudsolution.net',
  20. 'samsungqbe.com',
  21. 'ntp.api.bz',
  22. 'cdn.tuk.dev',
  23. 'vocadb-analytics.fly.dev',
  24. 'img.vim-cn.com'
  25. ];
  26. const WHITELIST: string[] = ['httpdns.bilivideo.com', 'ntp.api.bz', 'httpdns-v6.gslb.yy.com', 'img.vim-cn.com', 'img.jjbb.me', 'thingproxy.freeboard.io', 'assets.chess24.com', 'cdn.chess24.com', 'static-assets.freeanimehentai.net', 'static.javcdn.info', 'cdn.vidible.tv', 'it.apache.contactlab.it', 'mirror.netinch.com', 'de.freedif.org', 'league1.maoyuncloud.cn', 'spl.ztvx8.com', 'zls.xz6d.com', 'iadmatapk.nosdn.127.net', 'show.buzzcity.net', 'click.buzzcity.net', 'apps.buzzcity.net', 'content-cdn.y2mate.com', 'images.voguehk.com', 'cdn.amh.moe', 'statics.mnnews.tw'];
  27. task(require.main === module, __filename)(async (span) => {
  28. const files = await span.traceChildAsync('crawl thru all files', () => new Fdir()
  29. .withFullPaths()
  30. .filter((filepath, isDirectory) => {
  31. if (isDirectory) return true;
  32. const extname = path.extname(filepath);
  33. return extname !== '.js' && extname !== '.ts';
  34. })
  35. .crawl(SOURCE_DIR)
  36. .withPromise());
  37. const whiteTrie = span.traceChildSync('build whitelist trie', () => {
  38. const trie = new HostnameSmolTrie(WHITELIST);
  39. ENFORCED_WHITELIST.forEach((item) => trie.whitelist(item));
  40. return trie;
  41. });
  42. await Promise.all(files.map(file => span.traceChildAsync('dedupe ' + file, () => dedupeFile(file, whiteTrie))));
  43. });
  44. async function dedupeFile(file: string, whitelist: HostnameSmolTrie) {
  45. const result: string[] = [];
  46. const trie = new HostnameSmolTrie();
  47. let line: string | null = '';
  48. // eslint-disable-next-line @typescript-eslint/unbound-method -- .call
  49. let trieHasOrContains = HostnameSmolTrie.prototype.has;
  50. for await (const l of readFileByLine(file)) {
  51. line = processLine(l);
  52. if (!line) {
  53. if (l.startsWith('# $ skip_dedupe_src')) {
  54. return;
  55. }
  56. if (l.startsWith('# $ dedupe_use_trie_contains')) {
  57. // eslint-disable-next-line @typescript-eslint/unbound-method -- .call
  58. trieHasOrContains = HostnameSmolTrie.prototype.contains;
  59. }
  60. result.push(l); // keep all comments and blank lines
  61. continue;
  62. }
  63. if (trieHasOrContains.call(trie, line)) {
  64. continue; // drop duplicate
  65. }
  66. if (whitelist.has(line)) {
  67. continue; // drop whitelisted items
  68. }
  69. trie.add(line);
  70. result.push(line);
  71. }
  72. return fsp.writeFile(file, result.join('\n') + '\n');
  73. }
  74. // function isDomainSuffix(whiteItem: string, incomingItem: string) {
  75. // const whiteIncludeDomain = whiteItem[0] === '.';
  76. // whiteItem = whiteItem[0] === '.' ? whiteItem.slice(1) : whiteItem;
  77. // if (whiteItem === incomingItem) {
  78. // return true; // as long as exact match, we don't care if subdomain is included or not
  79. // }
  80. // if (whiteIncludeDomain) {
  81. // return incomingItem.endsWith('.' + whiteItem);
  82. // }
  83. // return false;
  84. // }