parse-filter.js 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. const { isIP } = require('net');
  2. const { fetch } = require('undici');
  3. const rDomain = /^(((?!\-))(xn\-\-)?[a-z0-9\-_]{0,61}[a-z0-9]{1,1}\.)*(xn\-\-)?([a-z0-9\-]{1,61}|[a-z0-9\-]{1,30})\.[a-z]{2,}$/m
  4. const DEBUG_DOMAIN_TO_FIND = null; // example.com | null
  5. /**
  6. * @param {string | URL} domainListsUrl
  7. */
  8. async function processDomainLists (domainListsUrl) {
  9. if (typeof domainListsUrl === 'string') {
  10. domainListsUrl = new URL(domainListsUrl);
  11. }
  12. /** @type Set<string> */
  13. const domainSets = new Set();
  14. /** @type string[] */
  15. const domains = (await (await fetch(domainListsUrl)).text()).split('\n');
  16. domains.forEach(line => {
  17. if (
  18. line.startsWith('#')
  19. || line.startsWith('!')
  20. || line.startsWith(' ')
  21. || line === ''
  22. || line.startsWith('\r')
  23. || line.startsWith('\n')
  24. ) {
  25. return;
  26. }
  27. const domainToAdd = line.trim();
  28. if (DEBUG_DOMAIN_TO_FIND && domainToAdd.includes(DEBUG_DOMAIN_TO_FIND)) {
  29. console.log(DEBUG_DOMAIN_TO_FIND, 'found in domain list:', domainToAdd);
  30. }
  31. domainSets.add(domainToAdd);
  32. });
  33. return [...domainSets];
  34. }
  35. /**
  36. * @param {string | URL} hostsUrl
  37. */
  38. async function processHosts (hostsUrl, includeAllSubDomain = false) {
  39. if (typeof hostsUrl === 'string') {
  40. hostsUrl = new URL(hostsUrl);
  41. }
  42. /** @type Set<string> */
  43. const domainSets = new Set();
  44. /** @type string[] */
  45. const hosts = (await (await fetch(hostsUrl)).text()).split('\n');
  46. hosts.forEach(line => {
  47. if (line.includes('#')) {
  48. return;
  49. }
  50. if (line.startsWith(' ') || line.startsWith('\r') || line.startsWith('\n') || line.trim() === '') {
  51. return;
  52. }
  53. const [, ...domains] = line.split(' ');
  54. const domain = domains.join(' ').trim();
  55. if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
  56. console.log(DEBUG_DOMAIN_TO_FIND, 'found in hosts:', hostsUrl);
  57. }
  58. if (rDomain.test(domain)) {
  59. if (includeAllSubDomain) {
  60. domainSets.add(`.${domain}`);
  61. } else {
  62. domainSets.add(domain);
  63. }
  64. }
  65. });
  66. return [...domainSets];
  67. }
  68. /**
  69. * @param {string | URL} filterRulesUrl
  70. * @returns {Promise<{ white: Set<string>, black: Set<string> }>}
  71. */
  72. async function processFilterRules (filterRulesUrl) {
  73. if (typeof filterRulesUrl === 'string') {
  74. filterRulesUrl = new URL(filterRulesUrl);
  75. }
  76. /** @type Set<string> */
  77. const whitelistDomainSets = new Set();
  78. /** @type Set<string> */
  79. const blacklistDomainSets = new Set();
  80. /** @type string[] */
  81. const filterRules = (await (await fetch(filterRulesUrl)).text()).split('\n').map(line => line.trim());
  82. filterRules.forEach(line => {
  83. const lineStartsWithDoubleVerticalBar = line.startsWith('||');
  84. if (
  85. line === ''
  86. || line.includes('#')
  87. || line.includes('!')
  88. || line.includes('*')
  89. || line.includes('/')
  90. || line.includes('$') && !lineStartsWithDoubleVerticalBar
  91. || line === ''
  92. || isIP(line) !== 0
  93. ) {
  94. return;
  95. }
  96. const lineEndsWithCaret = line.endsWith('^');
  97. const lineEndsWithCaretVerticalBar = line.endsWith('^|');
  98. if (lineStartsWithDoubleVerticalBar && line.endsWith('^$badfilter')) {
  99. const domain = line.replace('||', '').replace('^$badfilter', '').trim();
  100. if (rDomain.test(domain)) {
  101. whitelistDomainSets.add(domain);
  102. }
  103. } else if (line.startsWith('@@||')
  104. && (
  105. lineEndsWithCaret
  106. || lineEndsWithCaretVerticalBar
  107. || line.endsWith('^$badfilter')
  108. || line.endsWith('^$1p')
  109. )
  110. ) {
  111. const domain = line
  112. .replaceAll('@@||', '')
  113. .replaceAll('^$badfilter', '')
  114. .replaceAll('^$1p', '')
  115. .replaceAll('^|', '')
  116. .replaceAll('^', '')
  117. .trim();
  118. if (rDomain.test(domain)) {
  119. whitelistDomainSets.add(domain);
  120. }
  121. } else if (
  122. lineStartsWithDoubleVerticalBar
  123. && (
  124. lineEndsWithCaret
  125. || lineEndsWithCaretVerticalBar
  126. || line.endsWith('^$all')
  127. )
  128. ) {
  129. const domain = line
  130. .replaceAll('||', '')
  131. .replaceAll('^|', '')
  132. .replaceAll('^$all', '')
  133. .replaceAll('^', '')
  134. .trim();
  135. if (rDomain.test(domain)) {
  136. if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
  137. console.log(DEBUG_DOMAIN_TO_FIND, 'found in filter list:', hostsUrl);
  138. }
  139. blacklistDomainSets.add(`.${domain}`);
  140. }
  141. } else if (line.startsWith('://')
  142. && (
  143. lineEndsWithCaret
  144. || lineEndsWithCaretVerticalBar
  145. )
  146. ) {
  147. const domain = `${line.replaceAll('://', '').replaceAll('^|', '').replaceAll('^', '')}`.trim();
  148. if (rDomain.test(domain)) {
  149. if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
  150. console.log(DEBUG_DOMAIN_TO_FIND, 'found in filter list:', hostsUrl);
  151. }
  152. blacklistDomainSets.add(domain);
  153. }
  154. }
  155. });
  156. return {
  157. white: whitelistDomainSets,
  158. black: blacklistDomainSets
  159. };
  160. }
  161. module.exports.processDomainLists = processDomainLists;
  162. module.exports.processHosts = processHosts;
  163. module.exports.processFilterRules = processFilterRules;