| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081 |
- const { workerData } = require('piscina');
- const len = workerData.length;
- exports.dedupe = ({ chunk }) => {
- const outputToBeRemoved = new Set();
- for (let i = 0, l = chunk.length; i < l; i++) {
- const domainFromInput = chunk[i];
- for (let j = 0; j < len; j++) {
- const domainFromFullSet = workerData[j];
- if (domainFromFullSet === domainFromInput) continue;
- if (domainFromFullSet.charCodeAt(0) !== 46) continue;
- // domainFromFullSet is now startsWith a "."
- if (domainFromInput.charCodeAt(0) !== 46) {
- let shouldBeRemoved = true;
- for (let k = 0, l2 = domainFromInput.length; k < l2; k++) {
- if (domainFromFullSet.charCodeAt(k + 1) !== domainFromInput.charCodeAt(k)) {
- shouldBeRemoved = false;
- break;
- }
- }
- if (shouldBeRemoved) {
- outputToBeRemoved.add(domainFromInput);
- break;
- }
- }
- // domainFromInput is now startsWith a "."
- if (domainFromInput.length >= domainFromFullSet.length) {
- if (domainFromInput.endsWith(domainFromFullSet)) {
- outputToBeRemoved.add(domainFromInput);
- break;
- }
- }
- }
- }
- return outputToBeRemoved;
- };
- exports.whitelisted = ({ whiteList }) => {
- const outputToBeRemoved = new Set();
- for (const domain of workerData) {
- for (const white of whiteList) {
- if (domain.includes(white) || white.includes(domain)) {
- outputToBeRemoved.add(domain);
- break;
- }
- }
- }
- return outputToBeRemoved;
- };
- exports.dedupeKeywords = ({ keywords, suffixes }) => {
- const outputToBeRemoved = new Set();
- for (const domain of workerData) {
- for (const keyword of keywords) {
- if (domain.includes(keyword) || keyword.includes(domain)) {
- outputToBeRemoved.add(domain);
- break;
- }
- }
- for (const suffix of suffixes) {
- if (domain.endsWith(suffix)) {
- outputToBeRemoved.add(domain);
- break;
- }
- }
- }
- return outputToBeRemoved;
- }
|