Browse Source

Reject Hosts drop prefixed `www.`

SukkaW 1 year ago
parent
commit
9790b40a72

+ 1 - 1
Build/lib/get-phishing-domains.ts

@@ -208,7 +208,7 @@ const processPhihsingDomains = cache(function processPhihsingDomains(domainArr:
 });
 
 const downloads = [
-  ...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry, true)),
+  ...PHISHING_DOMAIN_LISTS_EXTRA.map(entry => processDomainListsWithPreload(...entry)),
   ...PHISHING_HOSTS_EXTRA.map(entry => processHostsWithPreload(...entry))
 ];
 

+ 14 - 6
Build/lib/normalize-domain.ts

@@ -10,7 +10,7 @@ type TldTsParsed = ReturnType<typeof tldts.parse>;
 /**
  * Skipped the input non-empty check, the `domain` should not be empty.
  */
-export function fastNormalizeDomain(domain: string, parsed: TldTsParsed | null = null) {
+export function fastNormalizeDomainWithoutWww(domain: string, parsed: TldTsParsed | null = null) {
   // We don't want tldts to call its own "extractHostname" on ip, bail out ip first.
   // Now ip has been bailed out, we can safely set normalizeTldtsOpt.detectIp to false.
   if (isProbablyIpv4(domain) || isProbablyIpv6(domain)) {
@@ -21,10 +21,22 @@ export function fastNormalizeDomain(domain: string, parsed: TldTsParsed | null =
   // Private invalid domain (things like .tor, .dn42, etc)
   if (!parsed.isIcann && !parsed.isPrivate) return null;
 
+  if (parsed.subdomain) {
+    if (parsed.subdomain === 'www') {
+      return parsed.domain;
+    }
+    if (parsed.subdomain.startsWith('www.')) {
+      return parsed.subdomain.slice(4) + '.' + parsed.domain;
+    }
+  }
+
   return parsed.hostname;
 }
 
-export function fastNormalizeDomainIgnoreWww(domain: string, parsed: TldTsParsed | null = null) {
+/**
+ * Skipped the input non-empty check, the `domain` should not be empty.
+ */
+export function fastNormalizeDomain(domain: string, parsed: TldTsParsed | null = null) {
   // We don't want tldts to call its own "extractHostname" on ip, bail out ip first.
   // Now ip has been bailed out, we can safely set normalizeTldtsOpt.detectIp to false.
   if (isProbablyIpv4(domain) || isProbablyIpv6(domain)) {
@@ -32,13 +44,9 @@ export function fastNormalizeDomainIgnoreWww(domain: string, parsed: TldTsParsed
   }
 
   parsed ??= tldts.parse(domain, normalizeTldtsOpt);
-
   // Private invalid domain (things like .tor, .dn42, etc)
   if (!parsed.isIcann && !parsed.isPrivate) return null;
 
-  if (parsed.subdomain === 'www') {
-    return parsed.domain;
-  }
   return parsed.hostname;
 }
 

+ 5 - 8
Build/lib/parse-filter/domainlists.ts

@@ -1,4 +1,4 @@
-import { fastNormalizeDomain, fastNormalizeDomainIgnoreWww } from '../normalize-domain';
+import { fastNormalizeDomain, fastNormalizeDomainWithoutWww } from '../normalize-domain';
 import { processLine } from '../process-line';
 import { onBlackFound } from './shared';
 import { fetchAssets } from '../fetch-assets';
@@ -27,9 +27,8 @@ function domainListLineCbIncludeAllSubdomain(line: string, set: string[], meta:
 
 export function processDomainLists(
   span: Span,
-  domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, wwwToApex = false
+  domainListsUrl: string, mirrors: string[] | null, includeAllSubDomain = false
 ) {
-  const domainNormalizer = wwwToApex ? fastNormalizeDomainIgnoreWww : fastNormalizeDomain;
   const lineCb = includeAllSubDomain ? domainListLineCbIncludeAllSubdomain : domainListLineCb;
 
   return span.traceChildAsync(`process domainlist: ${domainListsUrl}`, async (span) => {
@@ -42,7 +41,7 @@ export function processDomainLists(
 
     span.traceChildSync('parse domain list', () => {
       for (let i = 0, len = filterRules.length; i < len; i++) {
-        lineCb(filterRules[i], domainSets, domainListsUrl, domainNormalizer);
+        lineCb(filterRules[i], domainSets, domainListsUrl, fastNormalizeDomainWithoutWww);
       }
     });
 
@@ -52,10 +51,8 @@ export function processDomainLists(
 
 export function processDomainListsWithPreload(
   domainListsUrl: string, mirrors: string[] | null,
-  includeAllSubDomain = false, wwwToApex = false
+  includeAllSubDomain = false
 ) {
-  const domainNormalizer = wwwToApex ? fastNormalizeDomainIgnoreWww : fastNormalizeDomain;
-
   const downloadPromise = fetchAssets(domainListsUrl, mirrors, true);
   const lineCb = includeAllSubDomain ? domainListLineCbIncludeAllSubdomain : domainListLineCb;
 
@@ -65,7 +62,7 @@ export function processDomainListsWithPreload(
 
     span.traceChildSync('parse domain list', () => {
       for (let i = 0, len = filterRules.length; i < len; i++) {
-        lineCb(filterRules[i], domainSets, domainListsUrl, domainNormalizer);
+        lineCb(filterRules[i], domainSets, domainListsUrl, fastNormalizeDomainWithoutWww);
       }
     });
 

+ 3 - 3
Build/lib/parse-filter/filters.ts

@@ -3,10 +3,10 @@ import type { Span } from '../../trace';
 import { fetchAssets } from '../fetch-assets';
 import { onBlackFound, onWhiteFound } from './shared';
 import { createRetrieKeywordFilter as createKeywordFilter } from 'foxts/retrie';
-import { fastNormalizeDomain } from '../normalize-domain';
 import { looseTldtsOpt } from '../../constants/loose-tldts-opt';
 import tldts from 'tldts-experimental';
 import { NetworkFilter } from '@ghostery/adblocker';
+import { fastNormalizeDomainWithoutWww } from '../normalize-domain';
 
 const enum ParseType {
   WhiteIncludeSubdomain = 0,
@@ -221,7 +221,7 @@ export function parse($line: string, result: [string, ParseType], includeThirdPa
       && filter.isPlain() // isPlain() === !isRegex()
       && (!filter.isFullRegex())
     ) {
-      const hostname = fastNormalizeDomain(filter.hostname);
+      const hostname = fastNormalizeDomainWithoutWww(filter.hostname);
       if (!hostname) {
         result[1] = ParseType.Null;
         return result;
@@ -436,7 +436,7 @@ export function parse($line: string, result: [string, ParseType], includeThirdPa
     return result;
   }
 
-  const domain = fastNormalizeDomain(sliced);
+  const domain = fastNormalizeDomainWithoutWww(sliced);
 
   if (domain && domain === sliced) {
     result[0] = domain;

+ 2 - 2
Build/lib/parse-filter/hosts.ts

@@ -1,6 +1,6 @@
 import type { Span } from '../../trace';
 import { fetchAssets } from '../fetch-assets';
-import { fastNormalizeDomain } from '../normalize-domain';
+import { fastNormalizeDomainWithoutWww } from '../normalize-domain';
 import { onBlackFound } from './shared';
 
 function hostsLineCb(line: string, set: string[], includeAllSubDomain: boolean, meta: string) {
@@ -8,7 +8,7 @@ function hostsLineCb(line: string, set: string[], includeAllSubDomain: boolean,
   if (!_domain) {
     return;
   }
-  const domain = fastNormalizeDomain(_domain);
+  const domain = fastNormalizeDomainWithoutWww(_domain);
   if (!domain) {
     return;
   }