build-mitm-hostname.ts 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. import { readFileByLine } from './lib/fetch-text-by-line';
  2. import Table from 'cli-table3';
  3. import { fdir as Fdir } from 'fdir';
  4. import { green, yellow } from 'picocolors';
  5. import { processLineFromReadline } from './lib/process-line';
  6. import { getHostname } from 'tldts';
  7. import { OUTPUT_SURGE_DIR } from './constants/dir';
  8. const PRESET_MITM_HOSTNAMES = [
  9. // '*baidu.com',
  10. '*.ydstatic.com',
  11. // '*snssdk.com',
  12. // '*musical.com',
  13. // '*musical.ly',
  14. // '*snssdk.ly',
  15. 'api.zhihu.com',
  16. 'www.zhihu.com',
  17. 'api.chelaile.net.cn',
  18. 'atrace.chelaile.net.cn',
  19. '*.meituan.net',
  20. 'ctrl.playcvn.com',
  21. 'ctrl.playcvn.net',
  22. 'ctrl.zmzapi.com',
  23. 'ctrl.zmzapi.net',
  24. 'api.zhuishushenqi.com',
  25. 'b.zhuishushenqi.com',
  26. 'ggic.cmvideo.cn',
  27. 'ggic2.cmvideo.cn',
  28. 'mrobot.pcauto.com.cn',
  29. 'mrobot.pconline.com.cn',
  30. 'home.umetrip.com',
  31. 'discardrp.umetrip.com',
  32. 'startup.umetrip.com',
  33. 'dsp-x.jd.com',
  34. 'bdsp-x.jd.com'
  35. ];
  36. (async () => {
  37. const rulesets = await new Fdir()
  38. .withFullPaths()
  39. .crawl(OUTPUT_SURGE_DIR)
  40. .withPromise();
  41. const urlRegexPaths: Array<{ origin: string, processed: string }> = [];
  42. await Promise.all(rulesets.map(async file => {
  43. const content = await processLineFromReadline(readFileByLine(file));
  44. urlRegexPaths.push(
  45. ...content
  46. .filter(i => (
  47. i.startsWith('URL-REGEX')
  48. && !i.includes('http://')
  49. ))
  50. .map(i => i.split(',')[1])
  51. .map(i => ({
  52. origin: i,
  53. processed: i
  54. .replaceAll('^https?://', '')
  55. .replaceAll('^https://', '')
  56. .replaceAll('^http://', '')
  57. .split('/')[0]
  58. .replaceAll(String.raw`\.`, '.')
  59. .replaceAll('.+', '*')
  60. .replaceAll(String.raw`\d`, '*')
  61. .replaceAll('([a-z])', '*')
  62. .replaceAll('[a-z]', '*')
  63. .replaceAll('([0-9])', '*')
  64. .replaceAll('[0-9]', '*')
  65. .replaceAll(/{.+?}/g, '')
  66. .replaceAll(/\*+/g, '*')
  67. }))
  68. );
  69. }));
  70. const mitmDomains = new Set(PRESET_MITM_HOSTNAMES); // Special case for parsed failed
  71. const parsedFailures = new Set();
  72. const dedupedUrlRegexPaths = [...new Set(urlRegexPaths)];
  73. dedupedUrlRegexPaths.forEach(i => {
  74. const result = getHostnameSafe(i.processed);
  75. if (result) {
  76. mitmDomains.add(result);
  77. } else {
  78. parsedFailures.add(`${i.origin} ${i.processed} ${result}`);
  79. }
  80. });
  81. const mitmDomainsRegExpArray = Array.from(mitmDomains)
  82. .slice()
  83. .filter(i => {
  84. return i.length > 3
  85. && !i.includes('.mp4') // Special Case
  86. && i !== '(www.)' // Special Case
  87. && !(i !== '*.meituan.net' && i.endsWith('.meituan.net'))
  88. && !i.startsWith('.')
  89. && !i.endsWith('.')
  90. && !i.endsWith('*');
  91. })
  92. .map(i => {
  93. return new RegExp(
  94. escapeRegExp(i)
  95. .replaceAll('{www or not}', '(www.)?')
  96. .replaceAll(String.raw`\*`, '(.*)')
  97. );
  98. });
  99. const parsedTable = new Table({
  100. head: ['Hostname Pattern', 'Original Rules']
  101. });
  102. dedupedUrlRegexPaths.forEach(i => {
  103. const result = getHostnameSafe(i.processed);
  104. if (result) {
  105. if (matchWithRegExpArray(result, mitmDomainsRegExpArray)) {
  106. parsedTable.push([green(result), i.origin]);
  107. } else {
  108. parsedTable.push([yellow(result), i.origin]);
  109. }
  110. }
  111. });
  112. console.log('Mitm Hostnames:');
  113. console.log(`hostname = %APPEND% ${Array.from(mitmDomains).join(', ')}`);
  114. console.log('--------------------');
  115. console.log('Parsed Sucessed:');
  116. console.log(parsedTable.toString());
  117. console.log('--------------------');
  118. console.log('Parsed Failed');
  119. console.log(Array.from(parsedFailures).join('\n'));
  120. })();
  121. /** Util function */
  122. function getHostnameSafe(input: string) {
  123. const res = getHostname(input);
  124. if (res && /[^\s\w*.-]/.test(res)) return null;
  125. return res;
  126. }
  127. function matchWithRegExpArray(input: string, regexps: RegExp[] = []) {
  128. for (const r of regexps) {
  129. if (r.test(input)) return true;
  130. }
  131. return false;
  132. }
  133. function escapeRegExp(string = '') {
  134. const reRegExpChar = /[$()*+.?[\\\]^{|}]/g;
  135. const reHasRegExpChar = new RegExp(reRegExpChar.source);
  136. return string && reHasRegExpChar.test(string)
  137. ? string.replaceAll(reRegExpChar, String.raw`\$&`)
  138. : string;
  139. }