build-reject-domainset-worker.js 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. const { workerData } = require('piscina');
  2. exports.dedupe = ({ chunk }) => {
  3. const outputToBeRemoved = new Set();
  4. for (const domainFromInput of chunk) {
  5. for (const domainFromFullSet of workerData) {
  6. if (domainFromFullSet === domainFromInput) continue;
  7. if (domainFromFullSet.charAt(0) !== '.') continue;
  8. if (
  9. `.${domainFromInput}` === domainFromFullSet
  10. || domainFromInput.endsWith(domainFromFullSet)
  11. ) {
  12. outputToBeRemoved.add(domainFromInput);
  13. break;
  14. }
  15. }
  16. }
  17. return outputToBeRemoved;
  18. };
  19. exports.whitelisted = ({ whiteList }) => {
  20. const outputToBeRemoved = new Set();
  21. for (const domain of workerData) {
  22. for (const white of whiteList) {
  23. if (domain.includes(white) || white.includes(domain)) {
  24. outputToBeRemoved.add(domain);
  25. break;
  26. }
  27. }
  28. }
  29. return outputToBeRemoved;
  30. };
  31. exports.dedupeKeywords = ({ keywords, suffixes }) => {
  32. const outputToBeRemoved = new Set();
  33. for (const domain of workerData) {
  34. for (const keyword of keywords) {
  35. if (domain.includes(keyword) || keyword.includes(domain)) {
  36. outputToBeRemoved.add(domain);
  37. break;
  38. }
  39. }
  40. for (const suffix of suffixes) {
  41. if (domain.endsWith(suffix)) {
  42. outputToBeRemoved.add(domain);
  43. break;
  44. }
  45. }
  46. }
  47. return outputToBeRemoved;
  48. }