tools-dedupe-src.ts 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. import { fdir as Fdir } from 'fdir';
  2. import path from 'node:path';
  3. import fsp from 'node:fs/promises';
  4. import { SOURCE_DIR } from './constants/dir';
  5. import { readFileByLine } from './lib/fetch-text-by-line';
  6. import { processLine } from './lib/process-line';
  7. import { HostnameSmolTrie } from './lib/trie';
  8. const ENFORCED_WHITELIST = [
  9. 'hola.sk',
  10. 'hola.org',
  11. 'iadmatapk.nosdn.127.net',
  12. 'httpdns.bilivideo.com',
  13. 'httpdns-v6.gslb.yy.com',
  14. 'twemoji.maxcdn.com',
  15. 'samsungcloudsolution.com',
  16. 'samsungcloudsolution.net',
  17. 'samsungqbe.com'
  18. ];
  19. const WHITELIST: string[] = ['.us-api.samsungyosemite.com', '.api-hub.samsungyosemite.com', '.holashop.org', '.solopools.net', '.hola-shopping.com', '.100xbtc.com', '.brdtest.co', '.yelts.net', '.axepool.com', '.luxxeeu.com', '.etc-pool.com', '.alph2mine.com', 'samsungcloudsolution.com', 'samsungcloudsolution.net', 'samsungqbe.com', 'lgtvsdp.com', '.apextop.cc', '.p2p-south.xyz', '.r-pool.net', '.celcoin.io', '.gameforxe.eu', '.kipcoin.org', '.cryptonote.club', '.12level.com', '.piratenbucht.eu', '.minersmine.com', '.blockhunter.info', '.hola.com.sg', '.down.my0115.ru', '.js.my0115.ru', '.wmi.my0115.ru', '.statistic.date', '.pawpools.space', '.nimbocoin.com', '.hola.hk', '.hola.sk', '.solopools.org', '.minereasy.com', '.mynextphone.io', '.newzgames.com'];
  20. (async () => {
  21. const files = await new Fdir()
  22. .withFullPaths()
  23. .filter((filepath, isDirectory) => {
  24. if (isDirectory) return true;
  25. const extname = path.extname(filepath);
  26. return extname !== '.js' && extname !== '.ts';
  27. })
  28. .crawl(SOURCE_DIR)
  29. .withPromise();
  30. const whiteTrie = new HostnameSmolTrie(WHITELIST);
  31. ENFORCED_WHITELIST.forEach((item) => whiteTrie.whitelist(item));
  32. const whitelist = whiteTrie.dump();
  33. await Promise.all(files.map(file => dedupeFile(file, whitelist)));
  34. })();
  35. async function dedupeFile(file: string, whitelist: string[]) {
  36. const set = new Set<string>();
  37. const result: string[] = [];
  38. for await (const l of readFileByLine(file)) {
  39. const line = processLine(l);
  40. if (!line) {
  41. if (l.startsWith('# $ skip_dedupe_src')) {
  42. return;
  43. }
  44. result.push(l);
  45. continue;
  46. }
  47. if (set.has(line)) {
  48. continue;
  49. }
  50. // We can't use a trie here since we need to keep the order
  51. if (whitelist.some((whiteItem) => isDomainSuffix(whiteItem, line))) {
  52. continue;
  53. }
  54. set.add(line);
  55. result.push(line);
  56. }
  57. return fsp.writeFile(file, result.join('\n') + '\n');
  58. }
  59. function isDomainSuffix(whiteItem: string, incomingItem: string) {
  60. const whiteIncludeDomain = whiteItem[0] === '.';
  61. whiteItem = whiteItem[0] === '.' ? whiteItem.slice(1) : whiteItem;
  62. if (whiteItem === incomingItem) {
  63. return true; // as long as exact match, we don't care if subdomain is included or not
  64. }
  65. if (whiteIncludeDomain) {
  66. return incomingItem.endsWith('.' + whiteItem);
  67. }
  68. return false;
  69. }