build-mitm-hostname.ts 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. import { readFileByLine } from './lib/fetch-text-by-line';
  2. import fsPromises from 'fs/promises';
  3. import pathFn from 'path';
  4. import table from 'table';
  5. import listDir from '@sukka/listdir';
  6. import { green, yellow } from 'picocolors';
  7. import { processLineFromReadline } from './lib/process-line';
  8. import { getHostname } from 'tldts';
  9. const PRESET_MITM_HOSTNAMES = [
  10. // '*baidu.com',
  11. '*.ydstatic.com',
  12. // '*snssdk.com',
  13. // '*musical.com',
  14. // '*musical.ly',
  15. // '*snssdk.ly',
  16. 'api.zhihu.com',
  17. 'www.zhihu.com',
  18. 'api.chelaile.net.cn',
  19. 'atrace.chelaile.net.cn',
  20. '*.meituan.net',
  21. 'ctrl.playcvn.com',
  22. 'ctrl.playcvn.net',
  23. 'ctrl.zmzapi.com',
  24. 'ctrl.zmzapi.net',
  25. 'api.zhuishushenqi.com',
  26. 'b.zhuishushenqi.com',
  27. 'ggic.cmvideo.cn',
  28. 'ggic2.cmvideo.cn',
  29. 'mrobot.pcauto.com.cn',
  30. 'mrobot.pconline.com.cn',
  31. 'home.umetrip.com',
  32. 'discardrp.umetrip.com',
  33. 'startup.umetrip.com',
  34. 'dsp-x.jd.com',
  35. 'bdsp-x.jd.com'
  36. ];
  37. (async () => {
  38. const folderListPath = pathFn.resolve(__dirname, '../List/');
  39. const rulesets = await listDir(folderListPath);
  40. let urlRegexPaths = [];
  41. urlRegexPaths.push(
  42. ...(await fsPromises.readFile(pathFn.join(__dirname, '../Modules/sukka_url_rewrite.sgmodule'), { encoding: 'utf-8' }))
  43. .split('\n')
  44. .filter(
  45. i => !i.startsWith('#')
  46. && !i.startsWith('[')
  47. )
  48. .map(i => i.split(' ')[0])
  49. .map(i => ({
  50. origin: i,
  51. processed: i
  52. .replaceAll('(www.)?', '{www or not}')
  53. .replaceAll('^https?://', '')
  54. .replaceAll('^https://', '')
  55. .replaceAll('^http://', '')
  56. .split('/')[0]
  57. .replaceAll('\\.', '.')
  58. .replaceAll('.+', '*')
  59. .replaceAll('(.*)', '*')
  60. }))
  61. );
  62. const bothWwwApexDomains: Array<{ origin: string, processed: string }> = [];
  63. urlRegexPaths = urlRegexPaths.map(i => {
  64. if (!i.processed.includes('{www or not}')) return i;
  65. const d = i.processed.replace('{www or not}', '');
  66. bothWwwApexDomains.push({
  67. origin: i.origin,
  68. processed: `www.${d}`
  69. });
  70. return {
  71. origin: i.origin,
  72. processed: d
  73. };
  74. });
  75. urlRegexPaths.push(...bothWwwApexDomains);
  76. await Promise.all(rulesets.map(async file => {
  77. const content = await processLineFromReadline(readFileByLine(pathFn.join(folderListPath, file)));
  78. urlRegexPaths.push(
  79. ...content
  80. .filter(i => (
  81. i.startsWith('URL-REGEX')
  82. && !i.includes('http://')
  83. ))
  84. .map(i => i.split(',')[1])
  85. .map(i => ({
  86. origin: i,
  87. processed: i
  88. .replaceAll('^https?://', '')
  89. .replaceAll('^https://', '')
  90. .replaceAll('^http://', '')
  91. .split('/')[0]
  92. .replaceAll('\\.', '.')
  93. .replaceAll('.+', '*')
  94. .replaceAll('\\d', '*')
  95. .replaceAll('([a-z])', '*')
  96. .replaceAll('[a-z]', '*')
  97. .replaceAll('([0-9])', '*')
  98. .replaceAll('[0-9]', '*')
  99. .replaceAll(/{.+?}/g, '')
  100. .replaceAll(/\*+/g, '*')
  101. }))
  102. );
  103. }));
  104. const mitmDomains = new Set(PRESET_MITM_HOSTNAMES); // Special case for parsed failed
  105. const parsedFailures = new Set();
  106. const dedupedUrlRegexPaths = [...new Set(urlRegexPaths)];
  107. dedupedUrlRegexPaths.forEach(i => {
  108. const result = getHostnameSafe(i.processed);
  109. if (result) {
  110. mitmDomains.add(result);
  111. } else {
  112. parsedFailures.add(`${i.origin} ${i.processed} ${result}`);
  113. }
  114. });
  115. const mitmDomainsRegExpArray = Array.from(mitmDomains)
  116. .slice()
  117. .filter(i => {
  118. return i.length > 3
  119. && !i.includes('.mp4') // Special Case
  120. && i !== '(www.)' // Special Case
  121. && !(i !== '*.meituan.net' && i.endsWith('.meituan.net'))
  122. && !i.startsWith('.')
  123. && !i.endsWith('.')
  124. && !i.endsWith('*');
  125. })
  126. .map(i => {
  127. return new RegExp(
  128. escapeRegExp(i)
  129. .replaceAll('{www or not}', '(www.)?')
  130. .replaceAll('\\*', '(.*)')
  131. );
  132. });
  133. const parsedDomainsData: Array<[string, string]> = [];
  134. dedupedUrlRegexPaths.forEach(i => {
  135. const result = getHostnameSafe(i.processed);
  136. if (result) {
  137. if (matchWithRegExpArray(result, mitmDomainsRegExpArray)) {
  138. parsedDomainsData.push([green(result), i.origin]);
  139. } else {
  140. parsedDomainsData.push([yellow(result), i.origin]);
  141. }
  142. }
  143. });
  144. console.log('Mitm Hostnames:');
  145. console.log(`hostname = %APPEND% ${Array.from(mitmDomains).join(', ')}`);
  146. console.log('--------------------');
  147. console.log('Parsed Sucessed:');
  148. console.log(table.table(parsedDomainsData, {
  149. border: table.getBorderCharacters('void'),
  150. columnDefault: {
  151. paddingLeft: 0,
  152. paddingRight: 3
  153. },
  154. drawHorizontalLine: () => false
  155. }));
  156. console.log('--------------------');
  157. console.log('Parsed Failed');
  158. console.log([...parsedFailures].join('\n'));
  159. })();
  160. /** Util function */
  161. function getHostnameSafe(input: string) {
  162. const res = getHostname(input);
  163. if (res && /[^\s\w*.-]/.test(res)) return null;
  164. return res;
  165. }
  166. function matchWithRegExpArray(input: string, regexps: RegExp[] = []) {
  167. for (const r of regexps) {
  168. if (r.test(input)) return true;
  169. }
  170. return false;
  171. }
  172. function escapeRegExp(string = '') {
  173. const reRegExpChar = /[$()*+.?[\\\]^{|}]/g;
  174. const reHasRegExpChar = new RegExp(reRegExpChar.source);
  175. return string && reHasRegExpChar.test(string)
  176. ? string.replaceAll(reRegExpChar, '\\$&')
  177. : string;
  178. }