build-mitm-hostname.js 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. const fs = require('fs');
  2. const { promises: fsPromises } = fs;
  3. const pathFn = require('path');
  4. let table;
  5. const PRESET_MITM_HOSTNAMES = [
  6. '*baidu.com',
  7. '*ydstatic.com',
  8. '*snssdk.com',
  9. '*musical.com',
  10. '*musical.ly',
  11. '*snssdk.ly',
  12. 'api.chelaile.net.cn',
  13. 'atrace.chelaile.net.cn',
  14. '*.meituan.net',
  15. 'ctrl.playcvn.com',
  16. 'ctrl.playcvn.net',
  17. 'ctrl.zmzapi.com',
  18. 'ctrl.zmzapi.net',
  19. 'api.zhuishushenqi.com',
  20. 'b.zhuishushenqi.com',
  21. '*.music.126.net',
  22. '*.prod.hosts.ooklaserver.net'
  23. ];
  24. try {
  25. table = require('table');
  26. } catch (e) {
  27. console.log('Dependency "table" not found');
  28. console.log('"npm i table" then try again!');
  29. process.exit(1);
  30. }
  31. (async () => {
  32. const folderListPath = pathFn.resolve(__dirname, '../List/');
  33. const rulesets = await listDir(folderListPath);
  34. let urlRegexPaths = [];
  35. urlRegexPaths.push(
  36. ...(await fsPromises.readFile(pathFn.join(__dirname, '../Modules/sukka_url_rewrite.sgmodule'), { encoding: 'utf-8' }))
  37. .split('\n')
  38. .filter(
  39. i => !i.startsWith('#')
  40. && !i.startsWith('[')
  41. )
  42. .map(i => i.split(' ')[0])
  43. .map(i => ({
  44. origin: i,
  45. processed: i
  46. .replaceAll('(www.)?', '{www or not}')
  47. .replaceAll('^https?://', '')
  48. .replaceAll('^https://', '')
  49. .replaceAll('^http://', '')
  50. .split('/')[0]
  51. .replaceAll('\\.', '.')
  52. .replaceAll('.+', '*')
  53. .replaceAll('(.*)', '*')
  54. }))
  55. );
  56. const bothWwwApexDomains = [];
  57. urlRegexPaths = urlRegexPaths.map(i => {
  58. if (!i.processed.includes('{www or not}')) return i;
  59. const d = i.processed.replace('{www or not}', '');
  60. bothWwwApexDomains.push({
  61. origin: i.origin,
  62. processed: `www.${d}`
  63. });
  64. return {
  65. origin: i.origin,
  66. processed: d
  67. };
  68. });
  69. urlRegexPaths.push(...bothWwwApexDomains);
  70. await Promise.all(rulesets.map(async file => {
  71. const content = (await fsPromises.readFile(pathFn.join(folderListPath, file), { encoding: 'utf-8' })).split('\n');
  72. urlRegexPaths.push(
  73. ...content
  74. .filter(i => i.startsWith('URL-REGEX'))
  75. .map(i => i.split(',')[1])
  76. .map(i => ({
  77. origin: i,
  78. processed: i
  79. .replaceAll('^https?://', '')
  80. .replaceAll('^https://', '')
  81. .replaceAll('^http://', '')
  82. .replaceAll('\\.', '.')
  83. .replaceAll('.+', '*')
  84. .replaceAll('\\d', '*')
  85. .replaceAll('([a-z])', '*')
  86. .replaceAll('[a-z]', '*')
  87. .replaceAll('([0-9])', '*')
  88. .replaceAll('[0-9]', '*')
  89. .replaceAll(/{.+?}/g, '')
  90. .replaceAll(/\*+/g, '*')
  91. }))
  92. );
  93. }));
  94. let mitmDomains = new Set(PRESET_MITM_HOSTNAMES); // Special case for parsed failed
  95. const parsedFailures = new Set();
  96. const dedupedUrlRegexPaths = [...new Set(urlRegexPaths)];
  97. dedupedUrlRegexPaths.forEach(i => {
  98. const result = parseDomain(i.processed);
  99. if (result.success) {
  100. mitmDomains.add(result.hostname.trim());
  101. } else {
  102. parsedFailures.add(i.origin);
  103. }
  104. });
  105. mitmDomains = [...mitmDomains].filter(i => {
  106. return i.length > 3
  107. && !i.includes('.mp4') // Special Case
  108. && i !== '(www.)' // Special Case
  109. && !(i !== '*baidu.com' && i.endsWith('baidu.com')) // Special Case
  110. && !(i !== '*.meituan.net' && i.endsWith('.meituan.net'))
  111. && !i.startsWith('.')
  112. && !i.endsWith('.')
  113. && !i.endsWith('*')
  114. });
  115. const mitmDomainsRegExpArray = mitmDomains.map(i => {
  116. return new RegExp(
  117. escapeRegExp(i)
  118. .replaceAll('{www or not}', '(www.)?')
  119. .replaceAll('\\*', '(.*)')
  120. )
  121. });
  122. const parsedDomainsData = [];
  123. dedupedUrlRegexPaths.forEach(i => {
  124. const result = parseDomain(i.processed);
  125. if (result.success) {
  126. if (matchWithRegExpArray(result.hostname.trim(), mitmDomainsRegExpArray)) {
  127. parsedDomainsData.push([green(result.hostname), i.origin]);
  128. } else {
  129. parsedDomainsData.push([yellow(result.hostname), i.origin]);
  130. }
  131. }
  132. });
  133. console.log('Mitm Hostnames:');
  134. console.log('hostname = %APPEND% ' + mitmDomains.join(', '));
  135. console.log('--------------------');
  136. console.log('Parsed Sucessed:');
  137. console.log(table.table(parsedDomainsData, {
  138. border: table.getBorderCharacters('void'),
  139. columnDefault: {
  140. paddingLeft: 0,
  141. paddingRight: 3
  142. },
  143. drawHorizontalLine: () => false
  144. }));
  145. console.log('--------------------');
  146. console.log('Parsed Failed');
  147. console.log([...parsedFailures].join('\n'));
  148. })();
  149. /** Util function */
  150. function green(...args) {
  151. return `\u001b[32m${args.join(' ')}\u001b[0m`;
  152. }
  153. function yellow(...args) {
  154. return `\u001b[33m${args.join(' ')}\u001b[0m`;
  155. }
  156. function parseDomain(input) {
  157. try {
  158. const url = new URL(`https://${input}`);
  159. return {
  160. success: true,
  161. hostname: url.hostname
  162. }
  163. } catch {
  164. return {
  165. success: false
  166. }
  167. }
  168. }
  169. function matchWithRegExpArray(input, regexps = []) {
  170. for (const r of regexps) {
  171. if (r.test(input)) return true;
  172. }
  173. return false;
  174. }
  175. function escapeRegExp(string = '') {
  176. const reRegExpChar = /[\\^$.*+?()[\]{}|]/g;
  177. const reHasRegExpChar = RegExp(reRegExpChar.source);
  178. return string && reHasRegExpChar.test(string)
  179. ? string.replace(reRegExpChar, '\\$&')
  180. : string;
  181. }
  182. function listDir(path, options) {
  183. const results = [];
  184. options = Object.assign({ ignoreHidden: true, ignorePattern: null }, options);
  185. return listDirWalker(path, results, '', options).then(() => results);
  186. }
  187. function listDirWalker(path, results, parent, options) {
  188. const promises = [];
  189. return readAndFilterDir(path, options).then(items => {
  190. items.forEach(item => {
  191. const currentPath = pathFn.join(parent, item.name);
  192. if (item.isDirectory()) {
  193. promises.push(listDirWalker(pathFn.join(path, item.name), results, currentPath, options));
  194. }
  195. else {
  196. results.push(currentPath);
  197. }
  198. });
  199. }).then(() => Promise.all(promises));
  200. }
  201. function readAndFilterDir(path, options) {
  202. const { ignoreHidden = true, ignorePattern } = options;
  203. return fs.promises.readdir(path, Object.assign(Object.assign({}, options), { withFileTypes: true }))
  204. .then(results => {
  205. if (ignoreHidden) {
  206. results = results.filter(({ name }) => !name.startsWith('.'));
  207. }
  208. if (ignorePattern) {
  209. results = results.filter(({ name }) => !ignorePattern.test(name));
  210. }
  211. return results;
  212. });
  213. }