tools-dedupe-src.ts 3.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. import { fdir as Fdir } from 'fdir';
  2. import path from 'node:path';
  3. import fsp from 'node:fs/promises';
  4. import { SOURCE_DIR } from './constants/dir';
  5. import { readFileByLine } from './lib/fetch-text-by-line';
  6. const WHITELIST: string[] = ['packages.argotunnel.com', 'compass-ssl.xbox.com', 'static.agilebits.com', 'ntp.api.bz', 'softwareupdate.vmware.com', 'ftp.apache.org', 'ftp.cuhk.edu.hk', 'apache.belnet.be', 'mirrors.viethosting.com', 'apache.01link.hk', 'artfiles.org.org', 'mirror.synyx.de', 'apache.mediamirrors.org', 'wwwftp.ciril.fr', 'mirror.dkd.de', 'apache.javapipe.com', 'ftp.heikorichter.name', 'apache.panu.it', 'mirrors.supportex.net', 'apache.forsale.plus', 'apache.spinellicreations.com', 'ftp.itu.edu.tr', 'mirror1.spango.com', 'apache.oshte.net', 'mirrors.koehn.com', 'apache.dattatec.com', 'download.nextag.com', 'mirror.jre655.com', 'mirror.kiu.ac.ug', 'apache.cp.if.ua', 'mirrors.sorengard.com', 'ftp.igh.cnrs.fr', 'mirrors.hostingromania.ro', 'mirror.bhoovd.com', 'download.xs4all.nl', 'cpan.panu.it', 'cpan.nctu.edu.tw', 'mirror.serverbeheren.nl', 'cpan.llarian.net', 'cpan.etla.org', 'mirrors.syringanetworks.net', 'mirror.met.hu', 'cpan.cs.uu.nl', 'mirror.teklinks.com', 'mirror.rasanegar.com', 'ctan.kako-dev.de', 'ctan.ijs.si', 'mirrors.chevalier.io', 'mirror.yongbok.net', '1-mirrors.in.sahilister.net', '2-mirrors.in.sahilister.net', 'cc.uoc.gr', 'mirror.sergal.org', 'mirrors.mi.ras.ru', 'ctan.cs.uu.nl', 'mirrors.tripadvisor.com', 'gnu.spinellicreations.com', 'ftp.neowiz.com', 'mirror.rackdc.com', 'mirror.veriportal.com', 'ftp.pbone.net', 'downloader.cursor.sh', 'redrockdigimark.commirror', 'nimiq.by', 'aaxdetect.com', 'ctan.epst-tlemcen.dz', 'udahce.com', 'rs-staticart.ybcdn.net', 'doumpaq.com', 'c.medialytics.com', 'keybut.com', 'adserver.ubiyoo.com', 'kaspa-classic.com', 'minafacil.com', 'jiandanpool.com', 'xn--blockchan-n5a.com', 'alphax.pro', 'crypto-pool.online', 'bbqpool.org', 'nyxcoin.org', 'lpool.name', 'tsfpool.xyz', 'ltcmaster.xyz', '8282.space', 'myminingpool.uk', 'binance.live', 'mining.garden', 'scaleway.ovh', 'atpool.party', 'nimiq.by', 'binance.directory', 'onyx.run', 'lucky-pool.co.uk', 'ra7.xyz'];
  7. (async () => {
  8. const files = await new Fdir()
  9. .withFullPaths()
  10. .filter((filepath, isDirectory) => {
  11. if (isDirectory) return true;
  12. const extname = path.extname(filepath);
  13. return extname !== '.js' && extname !== '.ts';
  14. })
  15. .crawl(SOURCE_DIR)
  16. .withPromise();
  17. await Promise.all(files.map(dedupeFile));
  18. })();
  19. async function dedupeFile(file: string) {
  20. const set = new Set<string>();
  21. const result: string[] = [];
  22. for await (const line of readFileByLine(file)) {
  23. if (line.length === 0) {
  24. result.push(line);
  25. continue;
  26. }
  27. if (line[0] === '#') {
  28. result.push(line);
  29. continue;
  30. }
  31. if (set.has(line)) {
  32. continue;
  33. }
  34. // We can't use a trie here since we need to keep the order
  35. if (WHITELIST.some((item) => {
  36. if (item.length > line.length) {
  37. return false;
  38. }
  39. return (
  40. item === line // exact match
  41. || line.endsWith('.' + item) // the whitelist is considered as a domain-suffix
  42. );
  43. })) {
  44. continue;
  45. }
  46. set.add(line);
  47. result.push(line);
  48. }
  49. return fsp.writeFile(file, result.join('\n') + '\n');
  50. }