build-reject-domainset.js 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. const { simpleGet } = require('./util-http-get');
  2. const { promises: fsPromises } = require('fs');
  3. const { resolve: pathResolve } = require('path');
  4. let cliProgress;
  5. try {
  6. cliProgress = require('cli-progress');
  7. } catch (e) {
  8. console.log('Dependencies not found');
  9. console.log('"npm i cli-progress" then try again!');
  10. console.error(e);
  11. process.exit(1);
  12. }
  13. /**
  14. * @param {string | URL} hostsUrl
  15. */
  16. async function processHosts(hostsUrl, includeAllSubDomain = false) {
  17. if (typeof hostsUrl === 'string') {
  18. hostsUrl = new URL(hostsUrl);
  19. }
  20. /** @type Set<string> */
  21. const domainSets = new Set();
  22. /** @type string[] */
  23. const hosts = (await simpleGet.https(hostsUrl)).split('\n');
  24. hosts.forEach(line => {
  25. if (line.startsWith('#')) {
  26. return;
  27. }
  28. if (line.startsWith(' ') || line === '' || line.startsWith('\r') || line.startsWith('\n')) {
  29. return;
  30. }
  31. const [, ...domains] = line.split(' ');
  32. domainSets.add(`${includeAllSubDomain ? '.' : ''}${domains.join(' ')}`);
  33. });
  34. return [...domainSets];
  35. }
  36. /**
  37. * @param {string | URL} filterRulesUrl
  38. * @returns {Promise<{ white: string[], black: string[] }>}
  39. */
  40. async function processFilterRules(filterRulesUrl) {
  41. if (typeof filterRulesUrl === 'string') {
  42. filterRulesUrl = new URL(filterRulesUrl);
  43. }
  44. /** @type Set<string> */
  45. const whitelistDomainSets = new Set();
  46. /** @type Set<string> */
  47. const blacklistDomainSets = new Set();
  48. /** @type string[] */
  49. const filterRules = (await simpleGet.https(filterRulesUrl.hostname, filterRulesUrl.pathname)).split('\n');
  50. filterRules.forEach(line => {
  51. if (line.startsWith('#') || line.startsWith('!')) {
  52. return;
  53. }
  54. if (line.startsWith(' ') || line === '' || line.startsWith('\r') || line.startsWith('\n')) {
  55. return;
  56. }
  57. if (!line.includes('*') && !line.includes('//')) {
  58. if (line.startsWith('@@||') && line.endsWith('^')) {
  59. whitelistDomainSets.add(`${line.replaceAll('@@||', '').replaceAll('^', '')}`);
  60. } else if (line.startsWith('||') && line.endsWith('^')) {
  61. blacklistDomainSets.add(`${line.replaceAll('||', '').replaceAll('^', '')}`);
  62. }
  63. }
  64. });
  65. return {
  66. white: [...whitelistDomainSets],
  67. black: [...blacklistDomainSets]
  68. };
  69. }
  70. (async () => {
  71. /** @type Set<string> */
  72. const domainSets = new Set();
  73. // Parse from remote hosts
  74. (await Promise.all([
  75. processHosts('https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=1&mimetype=plaintext', true),
  76. processHosts('https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt'),
  77. processHosts('https://cdn.jsdelivr.net/gh/neoFelhz/neohosts@gh-pages/full/hosts'),
  78. processHosts('https://adaway.org/hosts.txt')
  79. ])).forEach(hosts => {
  80. hosts.forEach(host => {
  81. if (host) {
  82. domainSets.add(host);
  83. }
  84. });
  85. });
  86. console.log(`Import ${domainSets.size} rules from hosts files!`);
  87. console.log(`Start importing rules from reject_sukka.conf!`);
  88. await fsPromises.readFile(pathResolve(__dirname, '../List/domainset/reject_sukka.conf'), { encoding: 'utf-8' }).then(data => {
  89. data.split('\n').forEach(line => {
  90. if (
  91. line.startsWith('#')
  92. || line.startsWith(' ')
  93. || line === ''
  94. || line.startsWith('\r')
  95. || line.startsWith('\n')
  96. ) {
  97. return;
  98. }
  99. /* if (domainSets.has(line) || domainSets.has(`.${line}`)) {
  100. console.warn(`|${line}| is already in the list!`);
  101. } */
  102. domainSets.add(line);
  103. });
  104. });
  105. // Parse from AdGuard Filters
  106. /** @type Set<string> */
  107. const filterRuleWhitelistDomainSets = new Set();
  108. /** @type Set<string> */
  109. const filterRuleBlacklistDomainSets = new Set();
  110. (await Promise.all([
  111. processFilterRules('https://easylist.to/easylist/easylist.txt'),
  112. processFilterRules('https://adguardteam.github.io/AdGuardSDNSFilter/Filters/filter.txt'),
  113. processFilterRules('https://raw.githubusercontent.com/AdguardTeam/FiltersRegistry/master/filters/filter_11_Mobile/filter.txt'),
  114. processFilterRules('https://raw.githubusercontent.com/AdguardTeam/FiltersRegistry/master/filters/filter_3_Spyware/filter.txt'),
  115. processFilterRules('https://raw.githubusercontent.com/AdguardTeam/FiltersRegistry/master/filters/filter_2_English/filter.txt'),
  116. processFilterRules('https://raw.githubusercontent.com/AdguardTeam/FiltersRegistry/master/filters/filter_224_Chinese/filter.txt')
  117. ])).forEach(({ white, black }) => {
  118. white.forEach(i => filterRuleWhitelistDomainSets.add(i));
  119. black.forEach(i => filterRuleBlacklistDomainSets.add(i));
  120. });
  121. for (const black of filterRuleBlacklistDomainSets) {
  122. domainSets.add(`.${black}`);
  123. }
  124. console.log(`Import ${filterRuleBlacklistDomainSets.size} rules from adguard filters!`);
  125. // Remove whitelist from the domain sets
  126. console.log(`Remove whitelist from the domain sets!`);
  127. for (const domain of domainSets) {
  128. for (const white of filterRuleWhitelistDomainSets) {
  129. if (domain.includes(white) || white.includes(domain)) {
  130. domainSets.delete(domain);
  131. }
  132. }
  133. }
  134. // Read DOMAIN Keyword
  135. const domainKeywordsSet = new Set();
  136. await fsPromises.readFile(pathResolve(__dirname, '../List/non_ip/reject.conf'), { encoding: 'utf-8' }).then(data => {
  137. data.split('\n').forEach(line => {
  138. if (line.startsWith('DOMAIN-KEYWORD')) {
  139. const [, ...keywords] = line.split(',');
  140. domainKeywordsSet.add(keywords.join(','));
  141. }
  142. });
  143. });
  144. // Dedupe domainSets
  145. console.log(`Start deduping!`);
  146. const bar2 = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
  147. const domainSetsClone = [...domainSets];
  148. const len = domainSetsClone.length;
  149. bar2.start(len, 0);
  150. for (const domain of domainSets) {
  151. for (const keyword of domainKeywordsSet) {
  152. if (domain.includes(keyword)) {
  153. domainSets.delete(domain);
  154. continue;
  155. }
  156. }
  157. if (domain.startsWith('.')) {
  158. for (const domain2 of domainSets) {
  159. if (domain2 !== domain) {
  160. if (domain2.endsWith(domain) || `.${domain2}` === domain) {
  161. domainSets.delete(domain2);
  162. }
  163. }
  164. }
  165. }
  166. bar2.increment();
  167. }
  168. bar2.stop();
  169. return fsPromises.writeFile(pathResolve(__dirname, '../List/domainset/reject.conf'), [...domainSets].join('\n'));
  170. })();