build-mitm-hostname.ts 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. import { readFileByLine } from './lib/fetch-text-by-line';
  2. import pathFn from 'node:path';
  3. import table from 'table';
  4. import { fdir as Fdir } from 'fdir';
  5. import { green, yellow } from 'picocolors';
  6. import { processLineFromReadline } from './lib/process-line';
  7. import { getHostname } from 'tldts';
  8. import { OUTPUT_SURGE_DIR } from './constants/dir';
  9. const PRESET_MITM_HOSTNAMES = [
  10. // '*baidu.com',
  11. '*.ydstatic.com',
  12. // '*snssdk.com',
  13. // '*musical.com',
  14. // '*musical.ly',
  15. // '*snssdk.ly',
  16. 'api.zhihu.com',
  17. 'www.zhihu.com',
  18. 'api.chelaile.net.cn',
  19. 'atrace.chelaile.net.cn',
  20. '*.meituan.net',
  21. 'ctrl.playcvn.com',
  22. 'ctrl.playcvn.net',
  23. 'ctrl.zmzapi.com',
  24. 'ctrl.zmzapi.net',
  25. 'api.zhuishushenqi.com',
  26. 'b.zhuishushenqi.com',
  27. 'ggic.cmvideo.cn',
  28. 'ggic2.cmvideo.cn',
  29. 'mrobot.pcauto.com.cn',
  30. 'mrobot.pconline.com.cn',
  31. 'home.umetrip.com',
  32. 'discardrp.umetrip.com',
  33. 'startup.umetrip.com',
  34. 'dsp-x.jd.com',
  35. 'bdsp-x.jd.com'
  36. ];
  37. (async () => {
  38. const folderListPath = pathFn.resolve(OUTPUT_SURGE_DIR, 'List/');
  39. const rulesets = await new Fdir()
  40. .withFullPaths()
  41. .crawl(folderListPath)
  42. .withPromise();
  43. const urlRegexPaths: Array<{ origin: string, processed: string }> = [];
  44. await Promise.all(rulesets.map(async file => {
  45. const content = await processLineFromReadline(readFileByLine(file));
  46. urlRegexPaths.push(
  47. ...content
  48. .filter(i => (
  49. i.startsWith('URL-REGEX')
  50. && !i.includes('http://')
  51. ))
  52. .map(i => i.split(',')[1])
  53. .map(i => ({
  54. origin: i,
  55. processed: i
  56. .replaceAll('^https?://', '')
  57. .replaceAll('^https://', '')
  58. .replaceAll('^http://', '')
  59. .split('/')[0]
  60. .replaceAll(String.raw`\.`, '.')
  61. .replaceAll('.+', '*')
  62. .replaceAll(String.raw`\d`, '*')
  63. .replaceAll('([a-z])', '*')
  64. .replaceAll('[a-z]', '*')
  65. .replaceAll('([0-9])', '*')
  66. .replaceAll('[0-9]', '*')
  67. .replaceAll(/{.+?}/g, '')
  68. .replaceAll(/\*+/g, '*')
  69. }))
  70. );
  71. }));
  72. const mitmDomains = new Set(PRESET_MITM_HOSTNAMES); // Special case for parsed failed
  73. const parsedFailures = new Set();
  74. const dedupedUrlRegexPaths = [...new Set(urlRegexPaths)];
  75. dedupedUrlRegexPaths.forEach(i => {
  76. const result = getHostnameSafe(i.processed);
  77. if (result) {
  78. mitmDomains.add(result);
  79. } else {
  80. parsedFailures.add(`${i.origin} ${i.processed} ${result}`);
  81. }
  82. });
  83. const mitmDomainsRegExpArray = Array.from(mitmDomains)
  84. .slice()
  85. .filter(i => {
  86. return i.length > 3
  87. && !i.includes('.mp4') // Special Case
  88. && i !== '(www.)' // Special Case
  89. && !(i !== '*.meituan.net' && i.endsWith('.meituan.net'))
  90. && !i.startsWith('.')
  91. && !i.endsWith('.')
  92. && !i.endsWith('*');
  93. })
  94. .map(i => {
  95. return new RegExp(
  96. escapeRegExp(i)
  97. .replaceAll('{www or not}', '(www.)?')
  98. .replaceAll(String.raw`\*`, '(.*)')
  99. );
  100. });
  101. const parsedDomainsData: Array<[string, string]> = [];
  102. dedupedUrlRegexPaths.forEach(i => {
  103. const result = getHostnameSafe(i.processed);
  104. if (result) {
  105. if (matchWithRegExpArray(result, mitmDomainsRegExpArray)) {
  106. parsedDomainsData.push([green(result), i.origin]);
  107. } else {
  108. parsedDomainsData.push([yellow(result), i.origin]);
  109. }
  110. }
  111. });
  112. console.log('Mitm Hostnames:');
  113. console.log(`hostname = %APPEND% ${Array.from(mitmDomains).join(', ')}`);
  114. console.log('--------------------');
  115. console.log('Parsed Sucessed:');
  116. console.log(table.table(parsedDomainsData, {
  117. border: table.getBorderCharacters('void'),
  118. columnDefault: {
  119. paddingLeft: 0,
  120. paddingRight: 3
  121. },
  122. drawHorizontalLine: () => false
  123. }));
  124. console.log('--------------------');
  125. console.log('Parsed Failed');
  126. console.log(Array.from(parsedFailures).join('\n'));
  127. })();
  128. /** Util function */
  129. function getHostnameSafe(input: string) {
  130. const res = getHostname(input);
  131. if (res && /[^\s\w*.-]/.test(res)) return null;
  132. return res;
  133. }
  134. function matchWithRegExpArray(input: string, regexps: RegExp[] = []) {
  135. for (const r of regexps) {
  136. if (r.test(input)) return true;
  137. }
  138. return false;
  139. }
  140. function escapeRegExp(string = '') {
  141. const reRegExpChar = /[$()*+.?[\\\]^{|}]/g;
  142. const reHasRegExpChar = new RegExp(reRegExpChar.source);
  143. return string && reHasRegExpChar.test(string)
  144. ? string.replaceAll(reRegExpChar, String.raw`\$&`)
  145. : string;
  146. }