create-file.ts 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
  1. import path from 'node:path';
  2. import type { Span } from '../trace';
  3. import { surgeDomainsetToClashDomainset, surgeRulesetToClashClassicalTextRuleset } from './clash';
  4. import { ipCidrListToSingbox, surgeDomainsetToSingbox, surgeRulesetToSingbox } from './singbox';
  5. import { buildParseDomainMap, sortDomains } from './stable-sort-domain';
  6. import { createTrie } from './trie';
  7. import { invariant } from 'foxact/invariant';
  8. import { OUTPUT_CLASH_DIR, OUTPUT_SINGBOX_DIR, OUTPUT_SURGE_DIR } from '../constants/dir';
  9. import stringify from 'json-stringify-pretty-compact';
  10. import { appendArrayInPlace } from './append-array-in-place';
  11. import { nullthrow } from 'foxact/nullthrow';
  12. import createKeywordFilter from './aho-corasick';
  13. import picocolors from 'picocolors';
  14. import fs from 'node:fs';
  15. import { appendArrayFromSet, fastStringArrayJoin, writeFile } from './misc';
  16. import { readFileByLine } from './fetch-text-by-line';
  17. import { asyncWriteToStream } from './async-write-to-stream';
  18. const defaultSortTypeOrder = Symbol('defaultSortTypeOrder');
  19. const sortTypeOrder: Record<string | typeof defaultSortTypeOrder, number> = {
  20. DOMAIN: 1,
  21. 'DOMAIN-SUFFIX': 2,
  22. 'DOMAIN-KEYWORD': 10,
  23. // experimental domain wildcard support
  24. 'DOMAIN-WILDCARD': 20,
  25. 'DOMAIN-REGEX': 21,
  26. 'USER-AGENT': 30,
  27. 'PROCESS-NAME': 40,
  28. [defaultSortTypeOrder]: 50, // default sort order for unknown type
  29. 'URL-REGEX': 100,
  30. AND: 300,
  31. OR: 300,
  32. GEOIP: 400,
  33. 'IP-CIDR': 400,
  34. 'IP-CIDR6': 400
  35. };
  36. abstract class RuleOutput {
  37. protected domainTrie = createTrie<unknown>(null, true);
  38. protected domainKeywords = new Set<string>();
  39. protected domainWildcard = new Set<string>();
  40. protected userAgent = new Set<string>();
  41. protected processName = new Set<string>();
  42. protected processPath = new Set<string>();
  43. protected urlRegex = new Set<string>();
  44. protected ipcidr = new Set<string>();
  45. protected ipcidrNoResolve = new Set<string>();
  46. protected ipasn = new Set<string>();
  47. protected ipasnNoResolve = new Set<string>();
  48. protected ipcidr6 = new Set<string>();
  49. protected ipcidr6NoResolve = new Set<string>();
  50. protected geoip = new Set<string>();
  51. protected groipNoResolve = new Set<string>();
  52. // TODO: add sourceIpcidr
  53. // TODO: add sourcePort
  54. // TODO: add port
  55. protected otherRules: Array<[raw: string, orderWeight: number]> = [];
  56. protected abstract type: 'domainset' | 'non_ip' | 'ip';
  57. protected pendingPromise = Promise.resolve();
  58. static jsonToLines = (json: unknown): string[] => stringify(json).split('\n');
  59. static domainWildCardToRegex = (domain: string) => {
  60. let result = '^';
  61. for (let i = 0, len = domain.length; i < len; i++) {
  62. switch (domain[i]) {
  63. case '.':
  64. result += String.raw`\.`;
  65. break;
  66. case '*':
  67. result += '[a-zA-Z0-9-_.]*?';
  68. break;
  69. case '?':
  70. result += '[a-zA-Z0-9-_.]';
  71. break;
  72. default:
  73. result += domain[i];
  74. }
  75. }
  76. result += '$';
  77. return result;
  78. };
  79. constructor(
  80. protected readonly span: Span,
  81. protected readonly id: string
  82. ) {}
  83. protected title: string | null = null;
  84. withTitle(title: string) {
  85. this.title = title;
  86. return this;
  87. }
  88. protected description: string[] | readonly string[] | null = null;
  89. withDescription(description: string[] | readonly string[]) {
  90. this.description = description;
  91. return this;
  92. }
  93. protected date = new Date();
  94. withDate(date: Date) {
  95. this.date = date;
  96. return this;
  97. }
  98. protected apexDomainMap: Map<string, string> | null = null;
  99. protected subDomainMap: Map<string, string> | null = null;
  100. withDomainMap(apexDomainMap: Map<string, string>, subDomainMap: Map<string, string>) {
  101. this.apexDomainMap = apexDomainMap;
  102. this.subDomainMap = subDomainMap;
  103. return this;
  104. }
  105. addDomain(domain: string) {
  106. this.domainTrie.add(domain);
  107. return this;
  108. }
  109. addDomainSuffix(domain: string) {
  110. this.domainTrie.add(domain[0] === '.' ? domain : '.' + domain);
  111. return this;
  112. }
  113. bulkAddDomainSuffix(domains: string[]) {
  114. for (let i = 0, len = domains.length; i < len; i++) {
  115. this.addDomainSuffix(domains[i]);
  116. }
  117. return this;
  118. }
  119. addDomainKeyword(keyword: string) {
  120. this.domainKeywords.add(keyword);
  121. return this;
  122. }
  123. private async addFromDomainsetPromise(source: AsyncIterable<string> | Iterable<string> | string[]) {
  124. for await (const line of source) {
  125. if (line[0] === '.') {
  126. this.addDomainSuffix(line);
  127. } else {
  128. this.addDomain(line);
  129. }
  130. }
  131. }
  132. addFromDomainset(source: AsyncIterable<string> | Iterable<string> | string[]) {
  133. this.pendingPromise = this.pendingPromise.then(() => this.addFromDomainsetPromise(source));
  134. return this;
  135. }
  136. private async addFromRulesetPromise(source: AsyncIterable<string> | Iterable<string>) {
  137. for await (const line of source) {
  138. const splitted = line.split(',');
  139. const type = splitted[0];
  140. const value = splitted[1];
  141. const arg = splitted[2];
  142. switch (type) {
  143. case 'DOMAIN':
  144. this.addDomain(value);
  145. break;
  146. case 'DOMAIN-SUFFIX':
  147. this.addDomainSuffix(value);
  148. break;
  149. case 'DOMAIN-KEYWORD':
  150. this.addDomainKeyword(value);
  151. break;
  152. case 'DOMAIN-WILDCARD':
  153. this.domainWildcard.add(value);
  154. break;
  155. case 'USER-AGENT':
  156. this.userAgent.add(value);
  157. break;
  158. case 'PROCESS-NAME':
  159. if (value.includes('/') || value.includes('\\')) {
  160. this.processPath.add(value);
  161. } else {
  162. this.processName.add(value);
  163. }
  164. break;
  165. case 'URL-REGEX': {
  166. const [, ...rest] = splitted;
  167. this.urlRegex.add(rest.join(','));
  168. break;
  169. }
  170. case 'IP-CIDR':
  171. (arg === 'no-resolve' ? this.ipcidrNoResolve : this.ipcidr).add(value);
  172. break;
  173. case 'IP-CIDR6':
  174. (arg === 'no-resolve' ? this.ipcidr6NoResolve : this.ipcidr6).add(value);
  175. break;
  176. case 'IP-ASN':
  177. (arg === 'no-resolve' ? this.ipasnNoResolve : this.ipasn).add(value);
  178. break;
  179. case 'GEOIP':
  180. (arg === 'no-resolve' ? this.groipNoResolve : this.geoip).add(value);
  181. break;
  182. default:
  183. this.otherRules.push([line, type in sortTypeOrder ? sortTypeOrder[type] : sortTypeOrder[defaultSortTypeOrder]]);
  184. break;
  185. }
  186. }
  187. }
  188. addFromRuleset(source: AsyncIterable<string> | Iterable<string>) {
  189. this.pendingPromise = this.pendingPromise.then(() => this.addFromRulesetPromise(source));
  190. return this;
  191. }
  192. bulkAddCIDR4(cidr: string[]) {
  193. for (let i = 0, len = cidr.length; i < len; i++) {
  194. this.ipcidr.add(cidr[i]);
  195. }
  196. return this;
  197. }
  198. bulkAddCIDR4NoResolve(cidr: string[]) {
  199. for (let i = 0, len = cidr.length; i < len; i++) {
  200. this.ipcidrNoResolve.add(cidr[i]);
  201. }
  202. return this;
  203. }
  204. bulkAddCIDR6(cidr: string[]) {
  205. for (let i = 0, len = cidr.length; i < len; i++) {
  206. this.ipcidr6.add(cidr[i]);
  207. }
  208. return this;
  209. }
  210. bulkAddCIDR6NoResolve(cidr: string[]) {
  211. for (let i = 0, len = cidr.length; i < len; i++) {
  212. this.ipcidr6NoResolve.add(cidr[i]);
  213. }
  214. return this;
  215. }
  216. abstract write(): Promise<void>;
  217. }
  218. export class DomainsetOutput extends RuleOutput {
  219. protected type = 'domainset' as const;
  220. private $dumped: string[] | null = null;
  221. get dumped() {
  222. if (!this.$dumped) {
  223. const kwfilter = createKeywordFilter(this.domainKeywords);
  224. const results: string[] = [];
  225. const dumped = this.domainTrie.dump();
  226. for (let i = 0, len = dumped.length; i < len; i++) {
  227. const domain = dumped[i];
  228. if (!kwfilter(domain)) {
  229. results.push(domain);
  230. }
  231. }
  232. this.$dumped = results;
  233. }
  234. return this.$dumped;
  235. }
  236. calcDomainMap() {
  237. if (!this.apexDomainMap || !this.subDomainMap) {
  238. const { domainMap, subdomainMap } = buildParseDomainMap(this.dumped);
  239. this.apexDomainMap = domainMap;
  240. this.subDomainMap = subdomainMap;
  241. }
  242. }
  243. async write() {
  244. await this.pendingPromise;
  245. invariant(this.title, 'Missing title');
  246. invariant(this.description, 'Missing description');
  247. const sorted = sortDomains(this.dumped, this.apexDomainMap, this.subDomainMap);
  248. sorted.push('this_ruleset_is_made_by_sukkaw.ruleset.skk.moe');
  249. const surge = sorted;
  250. const clash = surgeDomainsetToClashDomainset(sorted);
  251. // TODO: Implement singbox directly using data
  252. const singbox = RuleOutput.jsonToLines(surgeDomainsetToSingbox(sorted));
  253. await Promise.all([
  254. compareAndWriteFile(
  255. this.span,
  256. withBannerArray(
  257. this.title,
  258. this.description,
  259. this.date,
  260. surge
  261. ),
  262. path.join(OUTPUT_SURGE_DIR, this.type, this.id + '.conf')
  263. ),
  264. compareAndWriteFile(
  265. this.span,
  266. withBannerArray(
  267. this.title,
  268. this.description,
  269. this.date,
  270. clash
  271. ),
  272. path.join(OUTPUT_CLASH_DIR, this.type, this.id + '.txt')
  273. ),
  274. compareAndWriteFile(
  275. this.span,
  276. singbox,
  277. path.join(OUTPUT_SINGBOX_DIR, this.type, this.id + '.json')
  278. )
  279. ]);
  280. }
  281. getStatMap() {
  282. invariant(this.dumped, 'Non dumped yet');
  283. invariant(this.apexDomainMap, 'Missing apex domain map');
  284. return Array.from(
  285. (
  286. nullthrow(this.dumped, 'Non dumped yet').reduce<Map<string, number>>((acc, cur) => {
  287. const suffix = this.apexDomainMap!.get(cur);
  288. if (suffix) {
  289. acc.set(suffix, (acc.get(suffix) ?? 0) + 1);
  290. }
  291. return acc;
  292. }, new Map())
  293. ).entries()
  294. )
  295. .filter(a => a[1] > 9)
  296. .sort(
  297. (a, b) => (b[1] - a[1]) || a[0].localeCompare(b[0])
  298. )
  299. .map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`);
  300. }
  301. }
  302. export class IPListOutput extends RuleOutput {
  303. protected type = 'ip' as const;
  304. constructor(span: Span, id: string, private readonly clashUseRule = true) {
  305. super(span, id);
  306. }
  307. async write() {
  308. await this.pendingPromise;
  309. invariant(this.title, 'Missing title');
  310. invariant(this.description, 'Missing description');
  311. const sorted4 = Array.from(this.ipcidr);
  312. const sorted6 = Array.from(this.ipcidr6);
  313. const merged = appendArrayInPlace(appendArrayInPlace([], sorted4), sorted6);
  314. const surge = sorted4.map(i => `IP-CIDR,${i}`);
  315. appendArrayInPlace(surge, sorted6.map(i => `IP-CIDR6,${i}`));
  316. surge.push('DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe');
  317. const clash = this.clashUseRule ? surge : merged;
  318. // TODO: Implement singbox directly using data
  319. const singbox = RuleOutput.jsonToLines(ipCidrListToSingbox(merged));
  320. await Promise.all([
  321. compareAndWriteFile(
  322. this.span,
  323. withBannerArray(
  324. this.title,
  325. this.description,
  326. this.date,
  327. surge
  328. ),
  329. path.join(OUTPUT_SURGE_DIR, this.type, this.id + '.conf')
  330. ),
  331. compareAndWriteFile(
  332. this.span,
  333. withBannerArray(
  334. this.title,
  335. this.description,
  336. this.date,
  337. clash
  338. ),
  339. path.join(OUTPUT_CLASH_DIR, this.type, this.id + '.txt')
  340. ),
  341. compareAndWriteFile(
  342. this.span,
  343. singbox,
  344. path.join(OUTPUT_SINGBOX_DIR, this.type, this.id + '.json')
  345. )
  346. ]);
  347. }
  348. }
  349. export class RulesetOutput extends RuleOutput {
  350. constructor(span: Span, id: string, protected type: 'non_ip' | 'ip') {
  351. super(span, id);
  352. }
  353. async write() {
  354. await this.pendingPromise;
  355. invariant(this.title, 'Missing title');
  356. invariant(this.description, 'Missing description');
  357. const results: string[] = [
  358. 'DOMAIN,this_ruleset_is_made_by_sukkaw.ruleset.skk.moe'
  359. ];
  360. const kwfilter = createKeywordFilter(this.domainKeywords);
  361. for (const domain of sortDomains(this.domainTrie.dump(), this.apexDomainMap, this.subDomainMap)) {
  362. if (kwfilter(domain)) {
  363. continue;
  364. }
  365. if (domain[0] === '.') {
  366. results.push(`DOMAIN-SUFFIX,${domain.slice(1)}`);
  367. } else {
  368. results.push(`DOMAIN,${domain}`);
  369. }
  370. }
  371. appendArrayFromSet(results, this.domainKeywords, i => `DOMAIN-KEYWORD,${i}`);
  372. appendArrayFromSet(results, this.domainWildcard, i => `DOMAIN-WILDCARD,${i}`);
  373. appendArrayFromSet(results, this.userAgent, i => `USER-AGENT,${i}`);
  374. appendArrayFromSet(results, this.processName, i => `PROCESS-NAME,${i}`);
  375. appendArrayFromSet(results, this.processPath, i => `PROCESS-NAME,${i}`);
  376. for (const [rule] of this.otherRules.sort((a, b) => a[1] - b[1])) {
  377. results.push(rule);
  378. }
  379. appendArrayFromSet(results, this.urlRegex, i => `URL-REGEX,${i}`);
  380. appendArrayFromSet(results, this.ipcidrNoResolve, i => `IP-CIDR,${i},no-resolve`);
  381. appendArrayFromSet(results, this.ipcidr6NoResolve, i => `IP-CIDR6,${i},no-resolve`);
  382. appendArrayFromSet(results, this.ipasnNoResolve, i => `IP-ASN,${i},no-resolve`);
  383. appendArrayFromSet(results, this.groipNoResolve, i => `GEOIP,${i},no-resolve`);
  384. appendArrayFromSet(results, this.ipcidr, i => `IP-CIDR,${i}`);
  385. appendArrayFromSet(results, this.ipcidr6, i => `IP-CIDR6,${i}`);
  386. appendArrayFromSet(results, this.ipasn, i => `IP-ASN,${i}`);
  387. appendArrayFromSet(results, this.geoip, i => `GEOIP,${i}`);
  388. for (const geoip of this.geoip) {
  389. results.push(`GEOIP,${geoip}`);
  390. }
  391. for (const geoip of this.groipNoResolve) {
  392. results.push(`GEOIP,${geoip},no-resolve`);
  393. }
  394. const surge = results;
  395. const clash = surgeRulesetToClashClassicalTextRuleset(results);
  396. // TODO: Implement singbox directly using data
  397. const singbox = RuleOutput.jsonToLines(surgeRulesetToSingbox(results));
  398. await Promise.all([
  399. compareAndWriteFile(
  400. this.span,
  401. withBannerArray(
  402. this.title,
  403. this.description,
  404. this.date,
  405. surge
  406. ),
  407. path.join(OUTPUT_SURGE_DIR, this.type, this.id + '.conf')
  408. ),
  409. compareAndWriteFile(
  410. this.span,
  411. withBannerArray(
  412. this.title,
  413. this.description,
  414. this.date,
  415. clash
  416. ),
  417. path.join(OUTPUT_CLASH_DIR, this.type, this.id + '.txt')
  418. ),
  419. compareAndWriteFile(
  420. this.span,
  421. singbox,
  422. path.join(OUTPUT_SINGBOX_DIR, this.type, this.id + '.json')
  423. )
  424. ]);
  425. }
  426. }
  427. function withBannerArray(title: string, description: string[] | readonly string[], date: Date, content: string[]) {
  428. return [
  429. '#########################################',
  430. `# ${title}`,
  431. `# Last Updated: ${date.toISOString()}`,
  432. `# Size: ${content.length}`,
  433. ...description.map(line => (line ? `# ${line}` : '#')),
  434. '#########################################',
  435. ...content,
  436. '################## EOF ##################'
  437. ];
  438. };
  439. export const fileEqual = async (linesA: string[], source: AsyncIterable<string>): Promise<boolean> => {
  440. if (linesA.length === 0) {
  441. return false;
  442. }
  443. let index = -1;
  444. for await (const lineB of source) {
  445. index++;
  446. if (index > linesA.length - 1) {
  447. if (index === linesA.length && lineB === '') {
  448. return true;
  449. }
  450. // The file becomes smaller
  451. return false;
  452. }
  453. const lineA = linesA[index];
  454. if (lineA[0] === '#' && lineB[0] === '#') {
  455. continue;
  456. }
  457. if (
  458. lineA[0] === '/'
  459. && lineA[1] === '/'
  460. && lineB[0] === '/'
  461. && lineB[1] === '/'
  462. && lineA[3] === '#'
  463. && lineB[3] === '#'
  464. ) {
  465. continue;
  466. }
  467. if (lineA !== lineB) {
  468. return false;
  469. }
  470. }
  471. if (index < linesA.length - 1) {
  472. // The file becomes larger
  473. return false;
  474. }
  475. return true;
  476. };
  477. export async function compareAndWriteFile(span: Span, linesA: string[], filePath: string) {
  478. let isEqual = true;
  479. const linesALen = linesA.length;
  480. if (fs.existsSync(filePath)) {
  481. isEqual = await fileEqual(linesA, readFileByLine(filePath));
  482. } else {
  483. console.log(`${filePath} does not exists, writing...`);
  484. isEqual = false;
  485. }
  486. if (isEqual) {
  487. console.log(picocolors.gray(picocolors.dim(`same content, bail out writing: ${filePath}`)));
  488. return;
  489. }
  490. await span.traceChildAsync(`writing ${filePath}`, async () => {
  491. // The default highwater mark is normally 16384,
  492. // So we make sure direct write to file if the content is
  493. // most likely less than 500 lines
  494. if (linesALen < 500) {
  495. return writeFile(filePath, fastStringArrayJoin(linesA, '\n') + '\n');
  496. }
  497. const writeStream = fs.createWriteStream(filePath);
  498. for (let i = 0; i < linesALen; i++) {
  499. const p = asyncWriteToStream(writeStream, linesA[i] + '\n');
  500. // eslint-disable-next-line no-await-in-loop -- stream high water mark
  501. if (p) await p;
  502. }
  503. await asyncWriteToStream(writeStream, '\n');
  504. writeStream.end();
  505. });
  506. }