tools-dedupe-src.ts 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. import { fdir as Fdir } from 'fdir';
  2. import path from 'node:path';
  3. import fsp from 'node:fs/promises';
  4. import { SOURCE_DIR } from './constants/dir';
  5. import { readFileByLine } from './lib/fetch-text-by-line';
  6. import { processLine } from './lib/process-line';
  7. const ENFORCED_WHITELIST = [
  8. 'hola.sk',
  9. 'hola.org',
  10. 'iadmatapk.nosdn.127.net',
  11. 'httpdns.bilivideo.com',
  12. 'httpdns-v6.gslb.yy.com',
  13. 'twemoji.maxcdn.com'
  14. ];
  15. const WHITELIST: string[] = ['ton.local.twitter.com', 'prod.msocdn.com', 'twemoji.maxcdn.com', 'img.urlnode.com', 'ipfsgate.com', 'googleplay.pro', 'iadmatapk.nosdn.127.net', 'hola-shopping.com', 'brdtest.co', 'mynextphone.io', 'hola.hk', 'holashop.org', 'hola.sk', 'hola.com.sg', 'c.medialytics.com', 'adstats.mgc-games.com', 'search.mgc-games.com', 'kissdoujin.com', 'newminersage.com', 'trossmining.de', 'hashncash.net', 'microsolt.ru', 'moneropool.ru', 'hashforcash.us', 'bitcoinn.biz', 'webmining.co', 'lamba.top', 'httpdns.bilivideo.com', 'httpdns-v6.gslb.yy.com', 'k-cdn.depot.dev', 'li-cdn.com'];
  16. (async () => {
  17. const files = await new Fdir()
  18. .withFullPaths()
  19. .filter((filepath, isDirectory) => {
  20. if (isDirectory) return true;
  21. const extname = path.extname(filepath);
  22. return extname !== '.js' && extname !== '.ts';
  23. })
  24. .crawl(SOURCE_DIR)
  25. .withPromise();
  26. const whitelist = WHITELIST.filter((item) => ENFORCED_WHITELIST.every((whitelistItem) => !isDomainSuffix(whitelistItem, item)));
  27. await Promise.all(files.map(file => dedupeFile(file, whitelist)));
  28. })();
  29. async function dedupeFile(file: string, whitelist: string[]) {
  30. const set = new Set<string>();
  31. const result: string[] = [];
  32. for await (const l of readFileByLine(file)) {
  33. const line = processLine(l);
  34. if (!line) {
  35. if (l.startsWith('# $ skip_dedupe_src')) {
  36. return;
  37. }
  38. result.push(l);
  39. continue;
  40. }
  41. if (set.has(line)) {
  42. continue;
  43. }
  44. // We can't use a trie here since we need to keep the order
  45. if (whitelist.some((item) => isDomainSuffix(item, line))) {
  46. continue;
  47. }
  48. set.add(line);
  49. result.push(line);
  50. }
  51. return fsp.writeFile(file, result.join('\n') + '\n');
  52. }
  53. function isDomainSuffix(suffixRule: string, domain: string) {
  54. if (suffixRule.length > domain.length + 1) {
  55. return false;
  56. }
  57. return suffixRule === domain || domain.endsWith('.' + suffixRule);
  58. }