parse-filter.js 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. const { fetchWithRetry } = require('./fetch-retry');
  2. const { normalizeDomain } = require('./is-domain-loose');
  3. const DEBUG_DOMAIN_TO_FIND = null; // example.com | null
  4. let foundDebugDomain = false;
  5. const warnOnceUrl = new Set();
  6. const warnOnce = (url, isWhite, ...message) => {
  7. const key = `${url}${isWhite ? 'white' : 'black'}`;
  8. if (warnOnceUrl.has(key)) {
  9. return;
  10. }
  11. warnOnceUrl.add(key);
  12. console.warn(url, isWhite ? '(white)' : '(black)', ...message);
  13. }
  14. /**
  15. * @param {string | URL} domainListsUrl
  16. */
  17. async function processDomainLists (domainListsUrl) {
  18. if (typeof domainListsUrl === 'string') {
  19. domainListsUrl = new URL(domainListsUrl);
  20. }
  21. /** @type Set<string> */
  22. const domainSets = new Set();
  23. /** @type string[] */
  24. const domains = (await (await fetchWithRetry(domainListsUrl)).text()).split('\n');
  25. domains.forEach(line => {
  26. if (
  27. line.startsWith('#')
  28. || line.startsWith('!')
  29. || line.startsWith(' ')
  30. || line === ''
  31. || line.startsWith('\r')
  32. || line.startsWith('\n')
  33. ) {
  34. return;
  35. }
  36. const domainToAdd = line.trim();
  37. if (DEBUG_DOMAIN_TO_FIND && domainToAdd.includes(DEBUG_DOMAIN_TO_FIND)) {
  38. warnOnce(domainListsUrl.toString(), false, DEBUG_DOMAIN_TO_FIND);
  39. foundDebugDomain = true;
  40. }
  41. domainSets.add(domainToAdd);
  42. });
  43. return [...domainSets];
  44. }
  45. /**
  46. * @param {string | URL} hostsUrl
  47. */
  48. async function processHosts (hostsUrl, includeAllSubDomain = false) {
  49. console.time(` - processHosts: ${hostsUrl}`);
  50. if (typeof hostsUrl === 'string') {
  51. hostsUrl = new URL(hostsUrl);
  52. }
  53. /** @type Set<string> */
  54. const domainSets = new Set();
  55. /** @type string[] */
  56. const hosts = (await (await fetchWithRetry(hostsUrl)).text()).split('\n');
  57. hosts.forEach(line => {
  58. if (line.includes('#')) {
  59. return;
  60. }
  61. if (line.startsWith(' ') || line.startsWith('\r') || line.startsWith('\n') || line.trim() === '') {
  62. return;
  63. }
  64. const [, ...domains] = line.split(' ');
  65. const _domain = domains.join(' ').trim();
  66. if (DEBUG_DOMAIN_TO_FIND && _domain.includes(DEBUG_DOMAIN_TO_FIND)) {
  67. warnOnce(hostsUrl.toString(), false, DEBUG_DOMAIN_TO_FIND);
  68. foundDebugDomain = true;
  69. }
  70. const domain = normalizeDomain(_domain);
  71. if (domain) {
  72. if (includeAllSubDomain) {
  73. domainSets.add(`.${domain}`);
  74. } else {
  75. domainSets.add(domain);
  76. }
  77. }
  78. });
  79. console.timeEnd(` - processHosts: ${hostsUrl}`);
  80. return [...domainSets];
  81. }
  82. /**
  83. * @param {string | URL} filterRulesUrl
  84. * @param {(string | URL)[] | undefined} fallbackUrls
  85. * @returns {Promise<{ white: Set<string>, black: Set<string>, foundDebugDomain: boolean }>}
  86. */
  87. async function processFilterRules (filterRulesUrl, fallbackUrls) {
  88. console.time(` - processFilterRules: ${filterRulesUrl}`);
  89. /** @type Set<string> */
  90. const whitelistDomainSets = new Set();
  91. /** @type Set<string> */
  92. const blacklistDomainSets = new Set();
  93. let filterRules;
  94. try {
  95. /** @type string[] */
  96. filterRules = (
  97. await Promise.any(
  98. [filterRulesUrl, ...(fallbackUrls || [])].map(
  99. async url => (await fetchWithRetry(url)).text()
  100. )
  101. )
  102. ).split('\n').map(line => line.trim());
  103. } catch (e) {
  104. console.log('Download Rule for [' + filterRulesUrl + '] failed');
  105. throw e;
  106. }
  107. for (let i = 0, len = filterRules.length; i < len; i++) {
  108. const line = filterRules[i];
  109. const lineStartsWithDoubleVerticalBar = line.startsWith('||');
  110. if (
  111. line === ''
  112. || line.includes('#')
  113. || line.includes('!')
  114. || line.includes('*')
  115. || line.includes('/')
  116. || line.includes('[')
  117. || line.includes('(')
  118. || line.includes('$') && !lineStartsWithDoubleVerticalBar
  119. ) {
  120. continue;
  121. }
  122. const lineEndsWithCaret = line.endsWith('^');
  123. const lineEndsWithCaretVerticalBar = line.endsWith('^|');
  124. if (lineStartsWithDoubleVerticalBar && line.endsWith('^$badfilter')) {
  125. const _domain = line.replace('||', '').replace('^$badfilter', '').trim();
  126. const domain = normalizeDomain(_domain);
  127. if (domain) {
  128. if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
  129. warnOnce(filterRulesUrl.toString(), true, DEBUG_DOMAIN_TO_FIND);
  130. foundDebugDomain = true;
  131. }
  132. whitelistDomainSets.add(domain);
  133. } else {
  134. console.warn(' * [parse-filter white] ' + _domain + ' is not a valid domain');
  135. }
  136. } else if (line.startsWith('@@||')
  137. && (
  138. lineEndsWithCaret
  139. || lineEndsWithCaretVerticalBar
  140. || line.endsWith('^$badfilter')
  141. || line.endsWith('^$1p')
  142. )
  143. ) {
  144. const _domain = line
  145. .replaceAll('@@||', '')
  146. .replaceAll('^$badfilter', '')
  147. .replaceAll('^$1p', '')
  148. .replaceAll('^|', '')
  149. .replaceAll('^', '')
  150. .trim();
  151. const domain = normalizeDomain(_domain);
  152. if (domain) {
  153. if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
  154. warnOnce(filterRulesUrl.toString(), true, DEBUG_DOMAIN_TO_FIND);
  155. foundDebugDomain = true;
  156. }
  157. whitelistDomainSets.add(domain);
  158. } else {
  159. console.warn(' * [parse-filter white] ' + _domain + ' is not a valid domain');
  160. }
  161. } else if (
  162. lineStartsWithDoubleVerticalBar
  163. && (
  164. lineEndsWithCaret
  165. || lineEndsWithCaretVerticalBar
  166. || line.endsWith('^$all')
  167. )
  168. ) {
  169. const _domain = line
  170. .replaceAll('||', '')
  171. .replaceAll('^|', '')
  172. .replaceAll('^$all', '')
  173. .replaceAll('^', '')
  174. .trim();
  175. const domain = normalizeDomain(_domain);
  176. if (domain) {
  177. if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
  178. warnOnce(filterRulesUrl.toString(), false, DEBUG_DOMAIN_TO_FIND);
  179. foundDebugDomain = true;
  180. }
  181. blacklistDomainSets.add(`.${domain}`);
  182. }
  183. } else if (
  184. line.startsWith('://')
  185. && (
  186. lineEndsWithCaret
  187. || lineEndsWithCaretVerticalBar
  188. )
  189. ) {
  190. const _domain = `${line.replaceAll('://', '').replaceAll('^|', '').replaceAll('^', '')}`.trim();
  191. const domain = normalizeDomain(_domain);
  192. if (domain) {
  193. if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
  194. warnOnce(filterRulesUrl.toString(), false, DEBUG_DOMAIN_TO_FIND);
  195. foundDebugDomain = true;
  196. }
  197. blacklistDomainSets.add(domain);
  198. }
  199. }
  200. }
  201. console.timeEnd(` - processFilterRules: ${filterRulesUrl}`);
  202. return {
  203. white: whitelistDomainSets,
  204. black: blacklistDomainSets,
  205. foundDebugDomain
  206. };
  207. }
  208. function preprocessFullDomainSetBeforeUsedAsWorkerData (data) {
  209. return data.filter(domain => (
  210. domain.charCodeAt(0) === 46
  211. ));
  212. }
  213. module.exports.processDomainLists = processDomainLists;
  214. module.exports.processHosts = processHosts;
  215. module.exports.processFilterRules = processFilterRules;
  216. module.exports.preprocessFullDomainSetBeforeUsedAsWorkerData = preprocessFullDomainSetBeforeUsedAsWorkerData;