Browse Source

Phishing domains trim `www`

SukkaW 1 year ago
parent
commit
ca9415ecc6

+ 1 - 1
Build/lib/get-phishing-domains.ts

@@ -208,7 +208,7 @@ const processPhihsingDomains = cache(function processPhihsingDomains(domainArr:
 });
 });
 
 
 const downloads = [
 const downloads = [
-  ...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry)),
+  ...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry, true)),
   ...PHISHING_HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry))
   ...PHISHING_HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry))
 ];
 ];
 
 

+ 18 - 0
Build/lib/normalize-domain.ts

@@ -24,6 +24,24 @@ export function fastNormalizeDomain(domain: string, parsed: TldTsParsed | null =
   return parsed.hostname;
   return parsed.hostname;
 }
 }
 
 
+export function fastNormalizeDomainIgnoreWww(domain: string, parsed: TldTsParsed | null = null) {
+  // We don't want tldts to call its own "extractHostname" on ip, bail out ip first.
+  // Now ip has been bailed out, we can safely set normalizeTldtsOpt.detectIp to false.
+  if (isProbablyIpv4(domain) || isProbablyIpv6(domain)) {
+    return null;
+  }
+
+  parsed ??= tldts.parse(domain, normalizeTldtsOpt);
+
+  // Private invalid domain (things like .tor, .dn42, etc)
+  if (!parsed.isIcann && !parsed.isPrivate) return null;
+
+  if (parsed.subdomain === 'www') {
+    return parsed.domain;
+  }
+  return parsed.hostname;
+}
+
 export function normalizeDomain(domain: string, parsed: TldTsParsed | null = null) {
 export function normalizeDomain(domain: string, parsed: TldTsParsed | null = null) {
   if (domain.length === 0) return null;
   if (domain.length === 0) return null;
 
 

+ 22 - 18
Build/lib/parse-filter/domainlists.ts

@@ -1,36 +1,35 @@
-import picocolors from 'picocolors';
-import { fastNormalizeDomain } from '../normalize-domain';
+import { fastNormalizeDomain, fastNormalizeDomainIgnoreWww } from '../normalize-domain';
 import { processLine } from '../process-line';
 import { processLine } from '../process-line';
 import { onBlackFound } from './shared';
 import { onBlackFound } from './shared';
 import { fetchAssets } from '../fetch-assets';
 import { fetchAssets } from '../fetch-assets';
 import type { Span } from '../../trace';
 import type { Span } from '../../trace';
 
 
-function domainListLineCb(l: string, set: string[], includeAllSubDomain: boolean, meta: string) {
+function domainListLineCb(l: string, set: string[], includeAllSubDomain: boolean, meta: string, normalizeDomain = fastNormalizeDomain) {
   const line = processLine(l);
   const line = processLine(l);
   if (!line) return;
   if (!line) return;
 
 
-  const domain = fastNormalizeDomain(line);
-  if (!domain) return;
-  if (domain !== line) {
-    console.log(
-      picocolors.red('[process domain list]'),
-      picocolors.gray(`line: ${line}`),
-      picocolors.gray(`domain: ${domain}`),
-      picocolors.gray(meta)
-    );
-
+  const domain = normalizeDomain(line);
+  if (!domain) {
+    // console.log(
+    //   picocolors.red('[process domain list]'),
+    //   picocolors.gray(`line: ${line}`),
+    //   picocolors.gray(`domain: ${domain}`),
+    //   picocolors.gray(meta)
+    // );
     return;
     return;
   }
   }
 
 
   onBlackFound(domain, meta);
   onBlackFound(domain, meta);
 
 
-  set.push(includeAllSubDomain ? `.${line}` : line);
+  set.push(includeAllSubDomain ? `.${domain}` : domain);
 }
 }
 
 
 export function processDomainLists(
 export function processDomainLists(
   span: Span,
   span: Span,
-  domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false
+  domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, wwwToApex = false
 ) {
 ) {
+  const domainNormalizer = wwwToApex ? fastNormalizeDomainIgnoreWww : fastNormalizeDomain;
+
   return span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => {
   return span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => {
     const text = await span.traceChildAsync('download', () => fetchAssets(
     const text = await span.traceChildAsync('download', () => fetchAssets(
       domainListsUrl,
       domainListsUrl,
@@ -41,7 +40,7 @@ export function processDomainLists(
 
 
     span.traceChildSync('parse domain list', () => {
     span.traceChildSync('parse domain list', () => {
       for (let i = 0, len = filterRules.length; i < len; i++) {
       for (let i = 0, len = filterRules.length; i < len; i++) {
-        domainListLineCb(filterRules[i], domainSets, includeAllSubDomain, domainListsUrl);
+        domainListLineCb(filterRules[i], domainSets, includeAllSubDomain, domainListsUrl, domainNormalizer);
       }
       }
     });
     });
 
 
@@ -49,7 +48,12 @@ export function processDomainLists(
   });
   });
 }
 }
 
 
-export function processDomainListsWithPreload(domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false) {
+export function processDomainListsWithPreload(
+  domainListsUrl: string, mirrors: string[] | null,
+  includeAllSubDomain = false, wwwToApex = false
+) {
+  const domainNormalizer = wwwToApex ? fastNormalizeDomainIgnoreWww : fastNormalizeDomain;
+
   const downloadPromise = fetchAssets(domainListsUrl, mirrors);
   const downloadPromise = fetchAssets(domainListsUrl, mirrors);
 
 
   return (span: Span) => span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => {
   return (span: Span) => span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => {
@@ -59,7 +63,7 @@ export function processDomainListsWithPreload(domainListsUrl: string, mirrors: s
 
 
     span.traceChildSync('parse domain list', () => {
     span.traceChildSync('parse domain list', () => {
       for (let i = 0, len = filterRules.length; i < len; i++) {
       for (let i = 0, len = filterRules.length; i < len; i++) {
-        domainListLineCb(filterRules[i], domainSets, includeAllSubDomain, domainListsUrl);
+        domainListLineCb(filterRules[i], domainSets, includeAllSubDomain, domainListsUrl, domainNormalizer);
       }
       }
     });
     });