| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- const { workerData } = require('piscina');
- exports.dedupe = ({ chunk }) => {
- const outputToBeRemoved = new Set();
- for (let i = 0, l = chunk.length; i < l; i++) {
- const domainFromInput = chunk[i];
- for (const domainFromFullSet of workerData) {
- if (outputToBeRemoved.has(domainFromFullSet)) continue;
- if (domainFromFullSet === domainFromInput) continue;
- if (domainFromFullSet.charAt(0) !== '.') continue;
- // domainFromFullSet is now startsWith a "."
- if (domainFromInput.charAt(0) !== '.') {
- let shouldBeRemoved = true;
- for (let j = 0, l2 = domainFromInput.length; j < l2; j++) {
- if (domainFromFullSet.charAt(j + 1) !== domainFromInput.charAt(j)) {
- shouldBeRemoved = false;
- break;
- }
- }
- if (shouldBeRemoved) {
- outputToBeRemoved.add(domainFromInput);
- break;
- }
- }
- // domainFromInput is now startsWith a "."
- if (domainFromInput.length >= domainFromFullSet.length) {
- if (domainFromInput.endsWith(domainFromFullSet)) {
- outputToBeRemoved.add(domainFromInput);
- break;
- }
- }
- }
- }
- return outputToBeRemoved;
- };
- exports.whitelisted = ({ whiteList }) => {
- const outputToBeRemoved = new Set();
- for (const domain of workerData) {
- for (const white of whiteList) {
- if (domain.includes(white) || white.includes(domain)) {
- outputToBeRemoved.add(domain);
- break;
- }
- }
- }
- return outputToBeRemoved;
- };
- exports.dedupeKeywords = ({ keywords, suffixes }) => {
- const outputToBeRemoved = new Set();
- for (const domain of workerData) {
- for (const keyword of keywords) {
- if (domain.includes(keyword) || keyword.includes(domain)) {
- outputToBeRemoved.add(domain);
- break;
- }
- }
- for (const suffix of suffixes) {
- if (domain.endsWith(suffix)) {
- outputToBeRemoved.add(domain);
- break;
- }
- }
- }
- return outputToBeRemoved;
- }
|