build-reject-domainset.js 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. const { simpleGet } = require('./util-http-get');
  2. const { promises: fsPromises } = require('fs');
  3. const { resolve: pathResolve } = require('path');
  4. let cliProgress;
  5. try {
  6. cliProgress = require('cli-progress');
  7. } catch (e) {
  8. console.log('Dependencies not found');
  9. console.log('"npm i cli-progress" then try again!');
  10. console.error(e);
  11. process.exit(1);
  12. }
  13. /**
  14. * @param {string | URL} hostsUrl
  15. */
  16. async function processHosts(hostsUrl, includeAllSubDomain = false) {
  17. if (typeof hostsUrl === 'string') {
  18. hostsUrl = new URL(hostsUrl);
  19. }
  20. /** @type Set<string> */
  21. const domainSets = new Set();
  22. /** @type string[] */
  23. const hosts = (await simpleGet.https(hostsUrl)).split('\n');
  24. hosts.forEach(line => {
  25. if (line.startsWith('#')) {
  26. return;
  27. }
  28. if (line.startsWith(' ') || line === '' || line.startsWith('\r') || line.startsWith('\n')) {
  29. return;
  30. }
  31. const [, ...domains] = line.split(' ');
  32. if (includeAllSubDomain) {
  33. domainSets.add(`.${domains.join(' ')}`.trim());
  34. } else {
  35. domainSets.add(domains.join(' ').trim());
  36. }
  37. });
  38. return [...domainSets];
  39. }
  40. /**
  41. * @param {string | URL} filterRulesUrl
  42. * @returns {Promise<{ white: Set<string>, black: Set<string> }>}
  43. */
  44. async function processFilterRules(filterRulesUrl) {
  45. if (typeof filterRulesUrl === 'string') {
  46. filterRulesUrl = new URL(filterRulesUrl);
  47. }
  48. /** @type Set<string> */
  49. const whitelistDomainSets = new Set(['localhost', 'analytics.google.com']);
  50. /** @type Set<string> */
  51. const blacklistDomainSets = new Set();
  52. /** @type string[] */
  53. const filterRules = (await simpleGet.https(filterRulesUrl.hostname, filterRulesUrl.pathname)).split('\n');
  54. filterRules.forEach(line => {
  55. if (
  56. line.startsWith('#')
  57. || line.startsWith('!')
  58. || line.startsWith(' ')
  59. || line === ''
  60. || line.startsWith('\r')
  61. || line.startsWith('\n')
  62. || line.includes('*')
  63. || line.includes('/')
  64. || line.includes('$')
  65. ) {
  66. return;
  67. }
  68. if (line.startsWith('@@||') && line.endsWith('^')) {
  69. whitelistDomainSets.add(`${line.replaceAll('@@||', '').replaceAll('^', '')}`.trim());
  70. } else if (line.startsWith('||') && line.endsWith('^')) {
  71. blacklistDomainSets.add(`${line.replaceAll('||', '').replaceAll('^', '')}`.trim());
  72. }
  73. });
  74. return {
  75. white: whitelistDomainSets,
  76. black: blacklistDomainSets
  77. };
  78. }
  79. (async () => {
  80. /** @type Set<string> */
  81. const domainSets = new Set();
  82. // Parse from remote hosts
  83. (await Promise.all([
  84. processHosts('https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&showintro=1&mimetype=plaintext', true),
  85. processHosts('https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt'),
  86. processHosts('https://cdn.jsdelivr.net/gh/neoFelhz/neohosts@gh-pages/full/hosts'),
  87. processHosts('https://adaway.org/hosts.txt')
  88. ])).forEach(hosts => {
  89. hosts.forEach(host => {
  90. if (host) {
  91. domainSets.add(host.trim());
  92. }
  93. });
  94. });
  95. console.log(`Import ${domainSets.size} rules from hosts files!`);
  96. await fsPromises.readFile(pathResolve(__dirname, '../List/domainset/reject_sukka.conf'), { encoding: 'utf-8' }).then(data => {
  97. data.split('\n').forEach(line => {
  98. if (
  99. line.startsWith('#')
  100. || line.startsWith(' ')
  101. || line === '' || line === ' '
  102. || line.startsWith('\r')
  103. || line.startsWith('\n')
  104. ) {
  105. return;
  106. }
  107. /* if (domainSets.has(line) || domainSets.has(`.${line}`)) {
  108. console.warn(`|${line}| is already in the list!`);
  109. } */
  110. domainSets.add(line.trim());
  111. });
  112. });
  113. console.log(`Import rules from reject_sukka.conf!`);
  114. // Parse from AdGuard Filters
  115. /** @type Set<string> */
  116. const filterRuleWhitelistDomainSets = new Set();
  117. /** @type Set<string> */
  118. const filterRuleBlacklistDomainSets = new Set();
  119. (await Promise.all([
  120. processFilterRules('https://easylist.to/easylist/easylist.txt'),
  121. processFilterRules('https://adguardteam.github.io/AdGuardSDNSFilter/Filters/filter.txt'),
  122. processFilterRules('https://raw.githubusercontent.com/AdguardTeam/FiltersRegistry/master/filters/filter_11_Mobile/filter.txt'),
  123. processFilterRules('https://raw.githubusercontent.com/AdguardTeam/FiltersRegistry/master/filters/filter_3_Spyware/filter.txt'),
  124. processFilterRules('https://raw.githubusercontent.com/AdguardTeam/FiltersRegistry/master/filters/filter_2_English/filter.txt'),
  125. processFilterRules('https://raw.githubusercontent.com/AdguardTeam/FiltersRegistry/master/filters/filter_224_Chinese/filter.txt')
  126. ])).forEach(({ white, black }) => {
  127. white.forEach(i => filterRuleWhitelistDomainSets.add(i));
  128. black.forEach(i => filterRuleBlacklistDomainSets.add(i));
  129. });
  130. for (const black of filterRuleBlacklistDomainSets) {
  131. domainSets.add(`.${black}`);
  132. }
  133. console.log(`Import ${filterRuleBlacklistDomainSets.size} black rules from adguard filters!`);
  134. console.log(`Import ${filterRuleWhitelistDomainSets.size} white rules from adguard filters!`);
  135. // Read DOMAIN Keyword
  136. const domainKeywordsSet = new Set();
  137. await fsPromises.readFile(pathResolve(__dirname, '../List/non_ip/reject.conf'), { encoding: 'utf-8' }).then(data => {
  138. data.split('\n').forEach(line => {
  139. if (line.startsWith('DOMAIN-KEYWORD')) {
  140. const [, ...keywords] = line.split(',');
  141. domainKeywordsSet.add(keywords.join(',').trim());
  142. }
  143. });
  144. });
  145. console.log(`Import ${domainKeywordsSet.size} black keywords!`);
  146. // Dedupe domainSets
  147. console.log(`Start deduping!`);
  148. const bar2 = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
  149. bar2.start(domainSets.size, 0);
  150. for (const domain of domainSets) {
  151. bar2.increment();
  152. let shouldContinue = false;
  153. for (const white of filterRuleWhitelistDomainSets) {
  154. if (domain.includes(white) || white.includes(domain)) {
  155. domainSets.delete(domain);
  156. shouldContinue = true;
  157. break;
  158. }
  159. }
  160. if (shouldContinue) {
  161. continue;
  162. }
  163. for (const keyword of domainKeywordsSet) {
  164. if (domain.includes(keyword) || keyword.includes(domain)) {
  165. domainSets.delete(domain);
  166. shouldContinue = true;
  167. break;
  168. }
  169. }
  170. if (shouldContinue) {
  171. continue;
  172. }
  173. for (const domain2 of domainSets) {
  174. if (
  175. domain2.startsWith('.')
  176. && domain2 !== domain
  177. && (
  178. domain.endsWith(domain2)
  179. || `.${domain}` === domain2
  180. )
  181. ) {
  182. domainSets.delete(domain);
  183. break;
  184. }
  185. }
  186. }
  187. bar2.stop();
  188. return fsPromises.writeFile(pathResolve(__dirname, '../List/domainset/reject.conf'), `${[...domainSets].join('\n')}\n`);
  189. })();