build-reject-domainset-worker.js 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. const { workerData } = require('piscina');
  2. exports.dedupe = ({ chunk }) => {
  3. const outputToBeRemoved = new Set();
  4. for (let i = 0, l = chunk.length; i < l; i++) {
  5. const domainFromInput = chunk[i];
  6. for (const domainFromFullSet of workerData) {
  7. if (domainFromFullSet === domainFromInput) continue;
  8. if (domainFromFullSet.charAt(0) !== '.') continue;
  9. if (
  10. (domainFromInput.charAt(0) !== '.' && `.${domainFromInput}` === domainFromFullSet)
  11. || domainFromInput.endsWith(domainFromFullSet)
  12. ) {
  13. outputToBeRemoved.add(domainFromInput);
  14. break;
  15. }
  16. }
  17. }
  18. return outputToBeRemoved;
  19. };
  20. exports.whitelisted = ({ whiteList }) => {
  21. const outputToBeRemoved = new Set();
  22. for (const domain of workerData) {
  23. for (const white of whiteList) {
  24. if (domain.includes(white) || white.includes(domain)) {
  25. outputToBeRemoved.add(domain);
  26. break;
  27. }
  28. }
  29. }
  30. return outputToBeRemoved;
  31. };
  32. exports.dedupeKeywords = ({ keywords, suffixes }) => {
  33. const outputToBeRemoved = new Set();
  34. for (const domain of workerData) {
  35. for (const keyword of keywords) {
  36. if (domain.includes(keyword) || keyword.includes(domain)) {
  37. outputToBeRemoved.add(domain);
  38. break;
  39. }
  40. }
  41. for (const suffix of suffixes) {
  42. if (domain.endsWith(suffix)) {
  43. outputToBeRemoved.add(domain);
  44. break;
  45. }
  46. }
  47. }
  48. return outputToBeRemoved;
  49. }