build-mitm-hostname.js 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. const fs = require('fs');
  2. const { promises: fsPromises } = fs;
  3. const pathFn = require('path');
  4. const table = require('table');
  5. const listDir = require('@sukka/listdir');
  6. const { green, yellow } = require('picocolors');
  7. const PRESET_MITM_HOSTNAMES = [
  8. '*baidu.com',
  9. '*ydstatic.com',
  10. '*snssdk.com',
  11. '*musical.com',
  12. '*musical.ly',
  13. '*snssdk.ly',
  14. 'api.chelaile.net.cn',
  15. 'atrace.chelaile.net.cn',
  16. '*.meituan.net',
  17. 'ctrl.playcvn.com',
  18. 'ctrl.playcvn.net',
  19. 'ctrl.zmzapi.com',
  20. 'ctrl.zmzapi.net',
  21. 'api.zhuishushenqi.com',
  22. 'b.zhuishushenqi.com',
  23. '*.music.126.net',
  24. '*.prod.hosts.ooklaserver.net'
  25. ];
  26. (async () => {
  27. const folderListPath = pathFn.resolve(__dirname, '../List/');
  28. const rulesets = await listDir(folderListPath);
  29. let urlRegexPaths = [];
  30. urlRegexPaths.push(
  31. ...(await fsPromises.readFile(pathFn.join(__dirname, '../Modules/sukka_url_rewrite.sgmodule'), { encoding: 'utf-8' }))
  32. .split('\n')
  33. .filter(
  34. i => !i.startsWith('#')
  35. && !i.startsWith('[')
  36. )
  37. .map(i => i.split(' ')[0])
  38. .map(i => ({
  39. origin: i,
  40. processed: i
  41. .replaceAll('(www.)?', '{www or not}')
  42. .replaceAll('^https?://', '')
  43. .replaceAll('^https://', '')
  44. .replaceAll('^http://', '')
  45. .split('/')[0]
  46. .replaceAll('\\.', '.')
  47. .replaceAll('.+', '*')
  48. .replaceAll('(.*)', '*')
  49. }))
  50. );
  51. const bothWwwApexDomains = [];
  52. urlRegexPaths = urlRegexPaths.map(i => {
  53. if (!i.processed.includes('{www or not}')) return i;
  54. const d = i.processed.replace('{www or not}', '');
  55. bothWwwApexDomains.push({
  56. origin: i.origin,
  57. processed: `www.${d}`
  58. });
  59. return {
  60. origin: i.origin,
  61. processed: d
  62. };
  63. });
  64. urlRegexPaths.push(...bothWwwApexDomains);
  65. await Promise.all(rulesets.map(async file => {
  66. const content = (await fsPromises.readFile(pathFn.join(folderListPath, file), { encoding: 'utf-8' })).split('\n');
  67. urlRegexPaths.push(
  68. ...content
  69. .filter(i => i.startsWith('URL-REGEX'))
  70. .map(i => i.split(',')[1])
  71. .map(i => ({
  72. origin: i,
  73. processed: i
  74. .replaceAll('^https?://', '')
  75. .replaceAll('^https://', '')
  76. .replaceAll('^http://', '')
  77. .replaceAll('\\.', '.')
  78. .replaceAll('.+', '*')
  79. .replaceAll('\\d', '*')
  80. .replaceAll('([a-z])', '*')
  81. .replaceAll('[a-z]', '*')
  82. .replaceAll('([0-9])', '*')
  83. .replaceAll('[0-9]', '*')
  84. .replaceAll(/{.+?}/g, '')
  85. .replaceAll(/\*+/g, '*')
  86. }))
  87. );
  88. }));
  89. let mitmDomains = new Set(PRESET_MITM_HOSTNAMES); // Special case for parsed failed
  90. const parsedFailures = new Set();
  91. const dedupedUrlRegexPaths = [...new Set(urlRegexPaths)];
  92. dedupedUrlRegexPaths.forEach(i => {
  93. const result = parseDomain(i.processed);
  94. if (result.success) {
  95. mitmDomains.add(result.hostname.trim());
  96. } else {
  97. parsedFailures.add(i.origin);
  98. }
  99. });
  100. mitmDomains = [...mitmDomains].filter(i => {
  101. return i.length > 3
  102. && !i.includes('.mp4') // Special Case
  103. && i !== '(www.)' // Special Case
  104. && !(i !== '*baidu.com' && i.endsWith('baidu.com')) // Special Case
  105. && !(i !== '*.meituan.net' && i.endsWith('.meituan.net'))
  106. && !i.startsWith('.')
  107. && !i.endsWith('.')
  108. && !i.endsWith('*')
  109. });
  110. const mitmDomainsRegExpArray = mitmDomains.map(i => {
  111. return new RegExp(
  112. escapeRegExp(i)
  113. .replaceAll('{www or not}', '(www.)?')
  114. .replaceAll('\\*', '(.*)')
  115. )
  116. });
  117. const parsedDomainsData = [];
  118. dedupedUrlRegexPaths.forEach(i => {
  119. const result = parseDomain(i.processed);
  120. if (result.success) {
  121. if (matchWithRegExpArray(result.hostname.trim(), mitmDomainsRegExpArray)) {
  122. parsedDomainsData.push([green(result.hostname), i.origin]);
  123. } else {
  124. parsedDomainsData.push([yellow(result.hostname), i.origin]);
  125. }
  126. }
  127. });
  128. console.log('Mitm Hostnames:');
  129. console.log('hostname = %APPEND% ' + mitmDomains.join(', '));
  130. console.log('--------------------');
  131. console.log('Parsed Sucessed:');
  132. console.log(table.table(parsedDomainsData, {
  133. border: table.getBorderCharacters('void'),
  134. columnDefault: {
  135. paddingLeft: 0,
  136. paddingRight: 3
  137. },
  138. drawHorizontalLine: () => false
  139. }));
  140. console.log('--------------------');
  141. console.log('Parsed Failed');
  142. console.log([...parsedFailures].join('\n'));
  143. })();
  144. /** Util function */
  145. function parseDomain(input) {
  146. try {
  147. const url = new URL(`https://${input}`);
  148. return {
  149. success: true,
  150. hostname: url.hostname
  151. }
  152. } catch {
  153. return {
  154. success: false
  155. }
  156. }
  157. }
  158. function matchWithRegExpArray(input, regexps = []) {
  159. for (const r of regexps) {
  160. if (r.test(input)) return true;
  161. }
  162. return false;
  163. }
  164. function escapeRegExp(string = '') {
  165. const reRegExpChar = /[\\^$.*+?()[\]{}|]/g;
  166. const reHasRegExpChar = RegExp(reRegExpChar.source);
  167. return string && reHasRegExpChar.test(string)
  168. ? string.replace(reRegExpChar, '\\$&')
  169. : string;
  170. }