tools-dedupe-src.ts 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. import { fdir as Fdir } from 'fdir';
  2. import path from 'node:path';
  3. import fsp from 'node:fs/promises';
  4. import { SOURCE_DIR } from './constants/dir';
  5. import { readFileByLine } from './lib/fetch-text-by-line';
  6. import { processLine } from './lib/process-line';
  7. import { HostnameSmolTrie } from './lib/trie';
  8. const ENFORCED_WHITELIST = [
  9. 'hola.sk',
  10. 'hola.org',
  11. 'hola-shopping.com',
  12. 'mynextphone.io',
  13. 'iadmatapk.nosdn.127.net',
  14. 'httpdns.bilivideo.com',
  15. 'httpdns-v6.gslb.yy.com',
  16. 'twemoji.maxcdn.com',
  17. 'samsungcloudsolution.com',
  18. 'samsungcloudsolution.net',
  19. 'samsungqbe.com'
  20. ];
  21. const WHITELIST: string[] = ['.lightspeedmining.com', 'samsungqbe.com', '.zbeos.com', '.holashop.org', '.jdie.pl', '.sponsor.printondemandagency.com', '.bmcm.pw', '.vplay.life', '.hola.hk', '.peopleland.net', '.120bit.com', '.tekyboycrypto.xyz', '.rocketpool.pro', '.cryptoloot.pro', '.weminerpool.site', '.timg135.top', '.binance.associates', '.lafermedumineur.fr', '.goldencoin.online', '.hola.sk', '.hola.com.sg', '.acashtech.com', '.bitoreum.org', '.mixpools.org', '.decapool.net', '.taichicoin.org', '.luxxeeu.com'];
  22. (async () => {
  23. const files = await new Fdir()
  24. .withFullPaths()
  25. .filter((filepath, isDirectory) => {
  26. if (isDirectory) return true;
  27. const extname = path.extname(filepath);
  28. return extname !== '.js' && extname !== '.ts';
  29. })
  30. .crawl(SOURCE_DIR)
  31. .withPromise();
  32. const whiteTrie = new HostnameSmolTrie(WHITELIST);
  33. ENFORCED_WHITELIST.forEach((item) => whiteTrie.whitelist(item));
  34. const whitelist = whiteTrie.dump();
  35. await Promise.all(files.map(file => dedupeFile(file, whitelist)));
  36. })();
  37. async function dedupeFile(file: string, whitelist: string[]) {
  38. const set = new Set<string>();
  39. const result: string[] = [];
  40. for await (const l of readFileByLine(file)) {
  41. const line = processLine(l);
  42. if (!line) {
  43. if (l.startsWith('# $ skip_dedupe_src')) {
  44. return;
  45. }
  46. result.push(l);
  47. continue;
  48. }
  49. if (set.has(line)) {
  50. continue;
  51. }
  52. // We can't use a trie here since we need to keep the order
  53. if (whitelist.some((whiteItem) => isDomainSuffix(whiteItem, line))) {
  54. continue;
  55. }
  56. set.add(line);
  57. result.push(line);
  58. }
  59. return fsp.writeFile(file, result.join('\n') + '\n');
  60. }
  61. function isDomainSuffix(whiteItem: string, incomingItem: string) {
  62. const whiteIncludeDomain = whiteItem[0] === '.';
  63. whiteItem = whiteItem[0] === '.' ? whiteItem.slice(1) : whiteItem;
  64. if (whiteItem === incomingItem) {
  65. return true; // as long as exact match, we don't care if subdomain is included or not
  66. }
  67. if (whiteIncludeDomain) {
  68. return incomingItem.endsWith('.' + whiteItem);
  69. }
  70. return false;
  71. }