diff --git a/.eslintrc.json b/.eslintrc.json index aa06a5c4c..946c5addb 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -1,11 +1,21 @@ { "root": true, "extends": ["sukka/node"], - "rules": { - "no-console": "off" - }, - "parserOptions": { - "ecmaVersion": "latest", - "sourceType": "module" - } + "ignorePatterns": [ + "node_modules/", + // disable for now + "**/*.d.ts" + ], + "overrides": [ + { + "files": ["**/*.js"], + "rules": { + "no-console": "off" + }, + "parserOptions": { + "ecmaVersion": "latest", + "sourceType": "module" + } + } + ] } diff --git a/Build/build-cdn-conf.js b/Build/build-cdn-conf.js index 2f02df6b9..d5bf83347 100644 --- a/Build/build-cdn-conf.js +++ b/Build/build-cdn-conf.js @@ -5,11 +5,22 @@ const { minifyRules } = require('./lib/minify-rules'); const { fetchRemoteTextAndCreateReadlineInterface, readFileByLine } = require('./lib/fetch-remote-text-by-line'); const Trie = require('./lib/trie'); const { runner } = require('./lib/trace-runner'); +const fs = require('fs'); + +const publicSuffixPath = path.resolve(__dirname, '../node_modules/.cache/public_suffix-list_dat.txt'); runner(__filename, async () => { const trie = new Trie(); - for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://publicsuffix.org/list/public_suffix_list.dat')) { - trie.add(line); + + if (fs.existsSync(publicSuffixPath)) { + for await (const line of readFileByLine(publicSuffixPath)) { + trie.add(line); + } + } else { + console.log('public_suffix_list.dat not found, fetch directly from remote.'); + for await (const line of await fetchRemoteTextAndCreateReadlineInterface('https://publicsuffix.org/list/public_suffix_list.dat')) { + trie.add(line); + } } /** @@ -18,13 +29,16 @@ runner(__filename, async () => { */ const S3OSSDomains = new Set(); - trie.find('.amazonaws.com') - .filter(line => (line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-')) - .forEach(line => S3OSSDomains.add(line)); - - trie.find('.scw.cloud') - .filter(line => (line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-')) - .forEach(line => S3OSSDomains.add(line)); + trie.find('.amazonaws.com').forEach(line => { + if ((line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-')) { + S3OSSDomains.add(line); + } + }); + trie.find('.scw.cloud').forEach(line => { + if ((line.startsWith('s3-') || line.startsWith('s3.')) && !line.includes('cn-')) { + S3OSSDomains.add(line); + } + }); /** @type {string[]} */ const cdnDomainsList = []; @@ -45,7 +59,7 @@ runner(__filename, async () => { ]; const ruleset = minifyRules(cdnDomainsList); - await Promise.all(createRuleset( + return Promise.all(createRuleset( 'Sukka\'s Ruleset - CDN Domains', description, new Date(), diff --git a/Build/build-domestic-ruleset.js b/Build/build-domestic-ruleset.js index d6dec47b2..670f743e3 100644 --- a/Build/build-domestic-ruleset.js +++ b/Build/build-domestic-ruleset.js @@ -19,8 +19,15 @@ runner(__filename, async () => { results.push( ...Object.entries(DOMESTICS) - .filter(([key]) => key !== 'SYSTEM') - .flatMap(([, { domains }]) => domains) + .reduce( + (acc, [key, { domains }]) => { + if (key === 'SYSTEM') { + return acc; + } + return [...acc, ...domains]; + }, + /** @type {string[]} */([]) + ) .sort(domainSorter) .map((domain) => `DOMAIN-SUFFIX,${domain}`) ); diff --git a/Build/build-internal-cdn-rules.js b/Build/build-internal-cdn-rules.js index 842f85b0c..1a0f6b36b 100644 --- a/Build/build-internal-cdn-rules.js +++ b/Build/build-internal-cdn-rules.js @@ -1,7 +1,6 @@ // @ts-check const fse = require('fs-extra'); const path = require('path'); -const { isDomainLoose } = require('./lib/is-domain-loose'); const tldts = require('tldts'); const { processLine } = require('./lib/process-line'); const { readFileByLine } = require('./lib/fetch-remote-text-by-line'); @@ -35,11 +34,15 @@ runner(__filename, async () => { */ const processLocalDomainSet = async (domainSetPath) => { for await (const line of readFileByLine(domainSetPath)) { - if (line[0] === '.') { - addApexDomain(line.slice(1)); - } else if (isDomainLoose(line)) { - addApexDomain(line); - } else if (processLine(line)) { + const parsed = tldts.parse(line, { allowPrivateDomains: true }); + if (!parsed.isIp && (parsed.isIcann || parsed.isPrivate)) { + if (parsed.domain) { + set.add(parsed.domain); + } + continue; + } + + if (processLine(line)) { console.warn('[drop line from domainset]', line); } } diff --git a/Build/build-internal-chn-domains.js b/Build/build-internal-chn-domains.js index 59c3418e0..440f15053 100644 --- a/Build/build-internal-chn-domains.js +++ b/Build/build-internal-chn-domains.js @@ -1,9 +1,9 @@ // @ts-check const path = require('path'); const fse = require('fs-extra'); -const fs = require('fs'); const { parseFelixDnsmasq } = require('./lib/parse-dnsmasq'); const { runner } = require('./lib/trace-runner'); +const { compareAndWriteFile } = require('./lib/create-file'); runner(__filename, async () => { const [result] = await Promise.all([ @@ -11,8 +11,8 @@ runner(__filename, async () => { fse.ensureDir(path.resolve(__dirname, '../List/internal')) ]); - await fs.promises.writeFile( - path.resolve(__dirname, '../List/internal/accelerated-china-domains.txt'), - `${result.map(line => `SUFFIX,${line}`).join('\n')}\n` + await compareAndWriteFile( + result.map(line => `SUFFIX,${line}`), + path.resolve(__dirname, '../List/internal/accelerated-china-domains.txt') ); }); diff --git a/Build/build-phishing-domainset.js b/Build/build-phishing-domainset.js index 6e7524d26..53b0c681f 100644 --- a/Build/build-phishing-domainset.js +++ b/Build/build-phishing-domainset.js @@ -1,10 +1,10 @@ -const { parse } = require('tldts'); +const tldts = require('tldts'); const { processFilterRules } = require('./lib/parse-filter.js'); const path = require('path'); const { createRuleset } = require('./lib/create-file'); const { processLine } = require('./lib/process-line.js'); const domainSorter = require('./lib/stable-sort-domain'); -const { runner } = require('./lib/trace-runner.js'); +const { runner, traceSync } = require('./lib/trace-runner.js'); const WHITELIST_DOMAIN = new Set([ 'w3s.link', @@ -61,19 +61,14 @@ const BLACK_TLD = new Set([ ]); runner(__filename, async () => { - const domainSet = Array.from( - (await processFilterRules('https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt')).black - ); + const domainSet = Array.from((await processFilterRules('https://curbengh.github.io/phishing-filter/phishing-filter-agh.txt')).black); const domainCountMap = {}; for (let i = 0, len = domainSet.length; i < len; i++) { const line = processLine(domainSet[i]); if (!line) continue; - const domain = line.charCodeAt(0) === 46 ? line.slice(1) : line; - - const parsed = parse(domain, { allowPrivateDomains: true }); - + const parsed = tldts.parse(line, { allowPrivateDomains: true }); const apexDomain = parsed.domain; if (apexDomain) { @@ -84,19 +79,18 @@ runner(__filename, async () => { domainCountMap[apexDomain] ||= 0; let isPhishingDomainMockingAmazon = false; - - if (domain.startsWith('amaz')) { + if (line.startsWith('.amaz')) { domainCountMap[apexDomain] += 0.5; isPhishingDomainMockingAmazon = true; - if (domain.startsWith('amazon-')) { + if (line.startsWith('.amazon-')) { domainCountMap[apexDomain] += 4.5; } - } else if (domain.startsWith('customer')) { + } else if (line.startsWith('.customer')) { domainCountMap[apexDomain] += 0.25; } - if (domain.includes('-co-jp')) { + if (line.includes('-co-jp')) { domainCountMap[apexDomain] += (isPhishingDomainMockingAmazon ? 4.5 : 0.5); } @@ -105,17 +99,17 @@ runner(__filename, async () => { domainCountMap[apexDomain] += 1; - if (domain.length > 19) { + if (line.length > 19) { // Add more weight if the domain is long enough - if (domain.length > 44) { + if (line.length > 44) { domainCountMap[apexDomain] += 3.5; - } else if (domain.length > 34) { + } else if (line.length > 34) { domainCountMap[apexDomain] += 2.5; - } else if (domain.length > 29) { + } else if (line.length > 29) { domainCountMap[apexDomain] += 1.5; - } else if (domain.length > 24) { + } else if (line.length > 24) { domainCountMap[apexDomain] += 0.75; - } else if (domain.length > 19) { + } else if (line.length > 19) { domainCountMap[apexDomain] += 0.25; } @@ -129,15 +123,14 @@ runner(__filename, async () => { } } - const results = []; - - Object.entries(domainCountMap).forEach(([domain, count]) => { - if (count >= 5) { - results.push(`.${domain}`); - } - }); - - results.sort(domainSorter); + const results = traceSync('* get final results', () => Object.entries(domainCountMap) + .reduce((acc, [apexDomain, count]) => { + if (count >= 5) { + acc.push(`.${apexDomain}`); + } + return acc; + }, /** @type {string[]} */([])) + .sort(domainSorter)); const description = [ 'License: AGPL 3.0', diff --git a/Build/build-reject-domainset.js b/Build/build-reject-domainset.js index b7a183268..2b5988821 100644 --- a/Build/build-reject-domainset.js +++ b/Build/build-reject-domainset.js @@ -1,20 +1,20 @@ // @ts-check -const fs = require('fs'); const fse = require('fs-extra'); const { resolve: pathResolve } = require('path'); -const tldts = require('tldts'); - const { processHosts, processFilterRules } = require('./lib/parse-filter'); const Trie = require('./lib/trie'); const { HOSTS, ADGUARD_FILTERS, PREDEFINED_WHITELIST, PREDEFINED_ENFORCED_BACKLIST } = require('./lib/reject-data-source'); -const { createRuleset } = require('./lib/create-file'); +const { createRuleset, compareAndWriteFile } = require('./lib/create-file'); const { processLine } = require('./lib/process-line'); const { domainDeduper } = require('./lib/domain-deduper'); const createKeywordFilter = require('./lib/aho-corasick'); const { readFileByLine } = require('./lib/fetch-remote-text-by-line'); -const domainSorter = require('./lib/stable-sort-domain'); +const { createDomainSorter } = require('./lib/stable-sort-domain'); +const { traceSync, runner } = require('./lib/trace-runner'); +const { getGorhillPublicSuffixPromise } = require('./lib/get-gorhill-publicsuffix'); +const { createCachedGorhillGetDomain } = require('./lib/cached-tld-parse'); /** Whitelists */ const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); @@ -22,7 +22,8 @@ const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST); const domainKeywordsSet = new Set(); /** @type {Set} Dedupe domains included by DOMAIN-SUFFIX */ const domainSuffixSet = new Set(); -(async () => { + +runner(__filename, async () => { /** @type Set */ const domainSets = new Set(); @@ -31,7 +32,8 @@ const domainSuffixSet = new Set(); let shouldStop = false; - await Promise.all([ + const [gorhill] = await Promise.all([ + getGorhillPublicSuffixPromise, // Parse from remote hosts & domain lists ...HOSTS.map(entry => processHosts(entry[0], entry[1]).then(hosts => { hosts.forEach(host => { @@ -129,7 +131,7 @@ const domainSuffixSet = new Set(); console.log(`Start deduping from black keywords/suffixes! (${previousSize})`); console.time('* Dedupe from black keywords/suffixes'); - const kwfilter = createKeywordFilter(Array.from(domainKeywordsSet)); + const kwfilter = createKeywordFilter(domainKeywordsSet); const trie1 = Trie.from(domainSets); domainSuffixSet.forEach(suffix => { @@ -167,19 +169,35 @@ const domainSuffixSet = new Set(); const START_TIME = Date.now(); - const dudupedDominArray = domainDeduper(Array.from(domainSets)); + const dudupedDominArray = traceSync('* Dedupe from covered subdomain', () => domainDeduper(Array.from(domainSets))); console.log(`* Dedupe from covered subdomain - ${(Date.now() - START_TIME) / 1000}s`); console.log(`Deduped ${previousSize - dudupedDominArray.length} rules!`); - /** @type {Record} */ - const rejectDomainsStats = dudupedDominArray.reduce((acc, cur) => { - const suffix = tldts.getDomain(cur, { allowPrivateDomains: false }); - if (suffix) { - acc[suffix] = (acc[suffix] ?? 0) + 1; - } - return acc; - }, {}); + // Create reject stats + const getDomain = createCachedGorhillGetDomain(gorhill); + /** @type {[string, number][]} */ + const rejectDomainsStats = traceSync( + '* Collect reject domain stats', + () => Object.entries( + dudupedDominArray.reduce((acc, cur) => { + const suffix = getDomain(cur); + if (suffix) { + acc[suffix] = (acc[suffix] ?? 0) + 1; + } + return acc; + }, {}) + ).filter(a => a[1] > 2).sort((a, b) => { + const t = b[1] - a[1]; + if (t === 0) { + return a[0].localeCompare(b[0]); + } + return t; + }) + ); + + const domainSorter = createDomainSorter(gorhill); + const domainset = traceSync('* Sort reject domainset', () => dudupedDominArray.sort(domainSorter)); const description = [ 'License: AGPL 3.0', @@ -192,7 +210,6 @@ const domainSuffixSet = new Set(); ...HOSTS.map(host => ` - ${host[0]}`), ...ADGUARD_FILTERS.map(filter => ` - ${Array.isArray(filter) ? filter[0] : filter}`) ]; - const domainset = dudupedDominArray.sort(domainSorter); await Promise.all([ ...createRuleset( @@ -204,21 +221,11 @@ const domainSuffixSet = new Set(); pathResolve(__dirname, '../List/domainset/reject.conf'), pathResolve(__dirname, '../Clash/domainset/reject.txt') ), - fs.promises.writeFile( - pathResolve(__dirname, '../List/internal/reject-stats.txt'), - Object.entries(rejectDomainsStats) - .filter(a => a[1] > 1) - .sort((a, b) => { - const t = b[1] - a[1]; - if (t === 0) { - return a[0].localeCompare(b[0]); - } - return t; - }) - .map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`) - .join('\n') + compareAndWriteFile( + rejectDomainsStats.map(([domain, count]) => `${domain}${' '.repeat(100 - domain.length)}${count}`), + pathResolve(__dirname, '../List/internal/reject-stats.txt') ), // Copy reject_sukka.conf for backward compatibility fse.copy(pathResolve(__dirname, '../Source/domainset/reject_sukka.conf'), pathResolve(__dirname, '../List/domainset/reject_sukka.conf')) ]); -})(); +}); diff --git a/Build/build.js b/Build/build.js index 9c076865c..15bdafe56 100644 --- a/Build/build.js +++ b/Build/build.js @@ -42,7 +42,7 @@ runner(__filename, async () => { } } - await Promise.all(promises); + return Promise.all(promises); }); /** diff --git a/Build/download-previous-build.js b/Build/download-previous-build.js index 5583b6c6a..138a6cf43 100644 --- a/Build/download-previous-build.js +++ b/Build/download-previous-build.js @@ -16,7 +16,7 @@ const fileExists = (path) => { .catch(() => false); }; -runner(__filename, async () => { +const downloadPreviousBuild = async () => { const filesList = ['Clash', 'List']; let allFileExists = true; @@ -79,4 +79,28 @@ runner(__filename, async () => { })); await fs.promises.unlink(extractedPath).catch(() => { }); +}; + +const downloadPublicSuffixList = async () => { + const publicSuffixDir = resolve(__dirname, '../node_modules/.cache'); + const publicSuffixPath = join(publicSuffixDir, 'public_suffix-list_dat.txt'); + + console.log('Download public suffix list.'); + + const [resp] = await Promise.all([ + fetch('https://publicsuffix.org/list/public_suffix_list.dat'), + fse.ensureDir(publicSuffixDir) + ]); + + await pipeline( + Readable.fromWeb(resp.body), + fs.createWriteStream(publicSuffixPath) + ); +}; + +runner(__filename, () => { + return Promise.all([ + downloadPreviousBuild(), + downloadPublicSuffixList() + ]); }); diff --git a/Build/lib/aho-corasick.js b/Build/lib/aho-corasick.js index 5ec4fdaff..b527ffc20 100644 --- a/Build/lib/aho-corasick.js +++ b/Build/lib/aho-corasick.js @@ -23,7 +23,7 @@ const createNode = (key, depth = 0) => ({ }); /** - * @param {string[]} keys + * @param {string[] | Set} keys */ const createKeywordFilter = (keys) => { const root = createNode('root'); @@ -39,16 +39,18 @@ const createKeywordFilter = (keys) => { const map = beginNode.children; // eslint-disable-next-line guard-for-in -- plain object for (const key in beginNode.children) { - const node = map[key]; + const node = map?.[key]; let failNode = beginNode.fail; - while (failNode && !failNode.children[key]) { + while (failNode && !failNode.children?.[key]) { failNode = failNode.fail; } - node.fail = failNode?.children[key] || root; + if (node) { + node.fail = failNode?.children?.[key] || root; - queue.push(node); + queue.push(node); + } } idx++; @@ -83,10 +85,9 @@ const createKeywordFilter = (keys) => { } }; - for (let idx = 0, len = keys.length; idx < len; idx++) { - const key = keys[idx]; - put(key, key.length); - } + keys.forEach(k => { + put(k, k.length); + }); build(); diff --git a/Build/lib/cache-apply.js b/Build/lib/cache-apply.js new file mode 100644 index 000000000..012f02a75 --- /dev/null +++ b/Build/lib/cache-apply.js @@ -0,0 +1,47 @@ +/** + * @param {string} [namespace] + */ +const createCache = (namespace, printStats = false) => { + const cache = new Map(); + + let hit = 0; + if (namespace && printStats) { + process.on('exit', () => { + console.log(`🔋 [cache] ${namespace} hit: ${hit}, size: ${cache.size}`); + }); + } + + return { + /** + * @template T + * @param {string} key + * @param {() => T} fn + * @returns {T} + */ + sync(key, fn) { + if (cache.has(key)) { + hit++; + return cache.get(key); + } + const value = fn(); + cache.set(key, value); + return value; + }, + /** + * @template T + * @param {string} key + * @param {() => Promise} fn + * @returns {Promise} + */ + async async(key, fn) { + if (cache.has(key)) { + hit++; + return cache.get(key); + } + const value = await fn(); + cache.set(key, value); + return value; + } + }; +}; +module.exports.createCache = createCache; diff --git a/Build/lib/cached-tld-parse.js b/Build/lib/cached-tld-parse.js new file mode 100644 index 000000000..89d868608 --- /dev/null +++ b/Build/lib/cached-tld-parse.js @@ -0,0 +1,25 @@ +const tldts = require('tldts'); +const { createCache } = require('./cache-apply'); + +const cache = createCache('cached-tld-parse', true); + +const sharedConfig = { allowPrivateDomains: true }; + +/** + * @param {string} domain + * @returns {ReturnType} + */ +module.exports.parse = (domain) => { + return cache.sync(domain, () => tldts.parse(domain, sharedConfig)); +}; + +const gothillGetDomainCache = createCache('cached-gorhill-get-domain', true); +/** + * @param {import('gorhill-publicsuffixlist').default | null} gorhill + */ +module.exports.createCachedGorhillGetDomain = (gorhill) => { + /** + * @param {string} domain + */ + return (domain) => gothillGetDomainCache.sync(domain, () => gorhill.getDomain(domain[0] === '.' ? domain.slice(1) : domain)); +}; diff --git a/Build/lib/create-file.js b/Build/lib/create-file.js index 0ee7a11a6..6be013e4e 100644 --- a/Build/lib/create-file.js +++ b/Build/lib/create-file.js @@ -32,10 +32,11 @@ async function compareAndWriteFile(linesA, filePath) { const stream = fs.createWriteStream(filePath, { encoding: 'utf-8' }); for (let i = 0, len = linesA.length; i < len; i++) { - // eslint-disable-next-line no-await-in-loop -- backpressure - await writeToStream(stream, linesA[i]); - // eslint-disable-next-line no-await-in-loop -- backpressure - await writeToStream(stream, '\n'); + const p = writeToStream(stream, `${linesA[i]}\n`); + if (p) { + // eslint-disable-next-line no-await-in-loop -- backpressure, besides we only wait for drain + await p; + } } stream.end(); } else { @@ -48,13 +49,13 @@ module.exports.compareAndWriteFile = compareAndWriteFile; * @param {import('fs').WriteStream} stream * @param {string} data */ -async function writeToStream(stream, data) { +function writeToStream(stream, data) { if (!stream.write(data)) { return /** @type {Promise} */(new Promise((resolve) => { - stream.once('drain', () => { resolve(); }); + stream.once('drain', resolve); })); } - return Promise.resolve(); + return null; } /** diff --git a/Build/lib/domain-deduper.js b/Build/lib/domain-deduper.js index 3ead13336..4b24234dc 100644 --- a/Build/lib/domain-deduper.js +++ b/Build/lib/domain-deduper.js @@ -1,3 +1,4 @@ +// @ts-check const Trie = require('./trie'); /** @@ -13,8 +14,10 @@ const domainDeduper = (inputDomains) => { continue; } + // delete all included subdomains (ends with `.example.com`) trie.find(d, false).forEach(f => sets.delete(f)); + // if `.example.com` exists, then `example.com` should also be removed const a = d.slice(1); if (trie.has(a)) { sets.delete(a); diff --git a/Build/lib/fetch-remote-text-by-line.js b/Build/lib/fetch-remote-text-by-line.js index f4fe31854..17be31f01 100644 --- a/Build/lib/fetch-remote-text-by-line.js +++ b/Build/lib/fetch-remote-text-by-line.js @@ -34,7 +34,7 @@ module.exports.createReadlineInterfaceFromResponse = createReadlineInterfaceFrom /** * @param {import('undici').RequestInfo} url - * @param {import('undici').RequestInit | undefined} [opt] + * @param {import('undici').RequestInit} [opt] */ module.exports.fetchRemoteTextAndCreateReadlineInterface = async (url, opt) => { const resp = await fetchWithRetry(url, opt); diff --git a/Build/lib/fetch-retry.js b/Build/lib/fetch-retry.js index b7c10333a..e72e3a4fd 100644 --- a/Build/lib/fetch-retry.js +++ b/Build/lib/fetch-retry.js @@ -1,4 +1,11 @@ // @ts-check -const { fetch } = require('undici'); -const fetchWithRetry = /** @type {fetch} */(require('@vercel/fetch-retry')(fetch)); +const undici = require('undici'); + +// Enable HTTP/2 supports +undici.setGlobalDispatcher(new undici.Agent({ + allowH2: true, + pipelining: 10 +})); + +const fetchWithRetry = /** @type {import('undici').fetch} */(require('@vercel/fetch-retry')(undici.fetch)); module.exports.fetchWithRetry = fetchWithRetry; diff --git a/Build/lib/get-gorhill-publicsuffix.js b/Build/lib/get-gorhill-publicsuffix.js new file mode 100644 index 000000000..1787a32c4 --- /dev/null +++ b/Build/lib/get-gorhill-publicsuffix.js @@ -0,0 +1,34 @@ +const { toASCII } = require('punycode/'); +const fs = require('fs'); +const path = require('path'); + +const publicSuffixPath = path.resolve(__dirname, '../../node_modules/.cache/public_suffix-list_dat.txt'); +const getPublicSuffixListDat = () => { + if (fs.existsSync(publicSuffixPath)) { + return fs.promises.readFile(publicSuffixPath, 'utf-8'); + } + console.log('public_suffix_list.dat not found, fetch directly from remote.'); + return fetch('https://publicsuffix.org/list/public_suffix_list.dat').then(r => r.text()); +}; + +const getGorhillPublicSuffix = async () => { + const customFetch = async (url) => { + const buf = await fs.promises.readFile(url); + return { + arrayBuffer() { return Promise.resolve(buf.buffer); } + }; + }; + + const [publicSuffixListDat, { default: gorhill }] = await Promise.all([ + getPublicSuffixListDat(), + import('gorhill-publicsuffixlist') + ]); + + gorhill.parse(publicSuffixListDat, toASCII); + await gorhill.enableWASM({ customFetch }); + + return gorhill; +}; + +const getGorhillPublicSuffixPromise = getGorhillPublicSuffix(); +module.exports.getGorhillPublicSuffixPromise = getGorhillPublicSuffixPromise; diff --git a/Build/lib/is-domain-loose.js b/Build/lib/is-domain-loose.js index 665fdec6e..0b72271d4 100644 --- a/Build/lib/is-domain-loose.js +++ b/Build/lib/is-domain-loose.js @@ -1,13 +1,14 @@ // @ts-check -const { parse } = require('tldts'); +const tldts = require('./cached-tld-parse'); /** * @param {string} domain */ module.exports.isDomainLoose = (domain) => { - const { isIcann, isPrivate, isIp } = parse(domain, { allowPrivateDomains: true }); + const { isIcann, isPrivate, isIp } = tldts.parse(domain); return !!(!isIp && (isIcann || isPrivate)); }; + /** * @param {string} domain */ @@ -16,12 +17,15 @@ module.exports.normalizeDomain = (domain) => { return null; } - const { isIcann, isPrivate, hostname, isIp } = parse(domain, { allowPrivateDomains: true }); + const { isIcann, isPrivate, hostname, isIp } = tldts.parse(domain); if (isIp) { return null; } if (isIcann || isPrivate) { + if (hostname?.[0] === '.') { + return hostname.slice(1); + } return hostname; } diff --git a/Build/lib/parse-filter.js b/Build/lib/parse-filter.js index 19d0e5449..70ca4af95 100644 --- a/Build/lib/parse-filter.js +++ b/Build/lib/parse-filter.js @@ -179,19 +179,15 @@ async function processFilterRules(filterRulesUrl, fallbackUrls, includeThirdPart const downloadStart = performance.now(); try { const controller = new AbortController(); - const signal = controller.signal; /** @type string[] */ filterRules = ( await Promise.any( - [filterRulesUrl, ...(fallbackUrls || [])].map( - url => fetchWithRetry(url, { signal }) - .then(r => r.text()) - .then(text => { - controller.abort(); - return text; - }) - ) + [filterRulesUrl, ...(fallbackUrls || [])].map(async url => { + const text = await fetchWithRetry(url, { signal: controller.signal }).then(r => r.text()); + controller.abort(); + return text; + }) ) ).split('\n').map(line => line.trim()); } catch (e) { @@ -317,10 +313,7 @@ function parse($line, includeThirdParties) { if ( // (line.startsWith('@@|') || line.startsWith('@@.')) - ( - line[2] === '|' - || line[2] === '.' - ) + (line[2] === '|' || line[2] === '.') && ( lineEndsWithCaret || lineEndsWithCaretVerticalBar @@ -374,7 +367,7 @@ function parse($line, includeThirdParties) { return null; } - const lineStartsWithSingleDot = line.startsWith('.'); + const lineStartsWithSingleDot = line[0] === '.'; if ( lineStartsWithSingleDot && ( @@ -437,7 +430,7 @@ function parse($line, includeThirdParties) { return null; } - const tryNormalizeDomain = normalizeDomain(lineStartsWithSingleDot ? line.slice(1) : line); + const tryNormalizeDomain = normalizeDomain(line); if ( tryNormalizeDomain && ( diff --git a/Build/lib/stable-sort-domain.js b/Build/lib/stable-sort-domain.js index 27c1299d4..4c3138b74 100644 --- a/Build/lib/stable-sort-domain.js +++ b/Build/lib/stable-sort-domain.js @@ -1,14 +1,4 @@ // @ts-check -const tldts = require('tldts'); - -const cache1 = Object.create(null); -/** - * @param {string} url - * @returns {ReturnType} - */ -// eslint-disable-next-line no-return-assign -- cache -const parse = (url) => (cache1[url] ||= tldts.parse(url, { allowPrivateDomains: true })); - /** * @param {string | null} a * @param {string | null} b @@ -49,32 +39,49 @@ const compare = (a, b) => { }; /** - * @param {string} a - * @param {string} b - * @returns {0 | 1 | -1} + * @param {import('gorhill-publicsuffixlist').default | null} [gorhill] */ -const domainSorter = (a, b) => { - if (a === b) return 0; +const createDomainSorter = (gorhill = null) => { + const cached = require('./cached-tld-parse'); - const aParsed = parse(a[0] === '.' ? a.slice(1) : a); - const bParsed = parse(b[0] === '.' ? b.slice(1) : b); + if (gorhill) { + /** + * @param {string} input + */ + const getDomain = cached.createCachedGorhillGetDomain(gorhill); - const resultDomainWithoutSuffix = compare(aParsed.domainWithoutSuffix, bParsed.domainWithoutSuffix); - if (resultDomainWithoutSuffix !== 0) { - return resultDomainWithoutSuffix; - } + /** + * @param {string} a + * @param {string} b + * @returns {0 | 1 | -1} + */ + return (a, b) => { + if (a === b) return 0; - const resultSuffix = compare(aParsed.publicSuffix, bParsed.publicSuffix); - if (resultSuffix !== 0) { - return resultSuffix; - } + const aDomain = getDomain(a); + const bDomain = getDomain(b); - const resultSubdomain = compare(aParsed.subdomain, bParsed.subdomain); - if (resultSubdomain !== 0) { - return resultSubdomain; + const resultDomain = compare(aDomain, bDomain); + return resultDomain !== 0 ? resultDomain : compare(a, b); + }; } - return 0; + const tldts = cached; + /** + * @param {string} a + * @param {string} b + * @returns {0 | 1 | -1} + */ + return (a, b) => { + if (a === b) return 0; + + const aDomain = tldts.parse(a).domain; + const bDomain = tldts.parse(b).domain; + + const resultDomain = compare(aDomain, bDomain); + return resultDomain !== 0 ? resultDomain : compare(a, b); + }; }; -module.exports = domainSorter; +module.exports = createDomainSorter(); +module.exports.createDomainSorter = createDomainSorter; diff --git a/Build/lib/trace-runner.js b/Build/lib/trace-runner.js index 649d2295b..5bd752000 100644 --- a/Build/lib/trace-runner.js +++ b/Build/lib/trace-runner.js @@ -1,15 +1,42 @@ const path = require('path'); +const { performance } = require('perf_hooks'); /** - * @param {Function} fn - * @param {string} __filename + * @template T + * @param {string} prefix + * @param {() => T} fn + * @returns {T} */ -module.exports.runner = async (__filename, fn) => { - const runnerName = path.basename(__filename, path.extname(__filename)); +const traceSync = (prefix, fn) => { + const start = performance.now(); + const result = fn(); + const end = performance.now(); + console.log(`${prefix}: ${(end - start).toFixed(3)}ms`); + return result; +}; +module.exports.traceSync = traceSync; - const start = Date.now(); +/** + * @template T + * @param {string} prefix + * @param {() => Promise} fn + * @returns {Promise} + */ +const traceAsync = async (prefix, fn) => { + const start = performance.now(); const result = await fn(); - const end = Date.now(); - console.log(`⌛ [${runnerName}]: ${end - start}ms`); + const end = performance.now(); + console.log(`${prefix}: ${(end - start).toFixed(3)}ms`); return result; }; +module.exports.traceAsync = traceAsync; + +/** + * @template T + * @param {string} __filename + * @param {() => Promise} fn + * @returns {T} + */ +module.exports.runner = async (__filename, fn) => { + return traceAsync(`⌛ [${path.basename(__filename, path.extname(__filename))}]`, fn); +}; diff --git a/Build/mod.d.ts b/Build/mod.d.ts new file mode 100644 index 000000000..1f3c0032c --- /dev/null +++ b/Build/mod.d.ts @@ -0,0 +1,36 @@ +declare module 'gorhill-publicsuffixlist' { + type Selfie = + | string + | { + magic: number; + buf32: number[]; + }; + interface Decoder { + decode: (bufferStr: string, buffer: ArrayBuffer) => void; + decodeSize: (bufferStr: string) => number; + } + interface Encoder { + encode: (buffer: ArrayBuffer, length: number) => string; + } + export interface PublicSuffixList { + version: string; + + parse(text: string, toAscii: (input: string) => string): void; + + getPublicSuffix(hostname: string): string; + getDomain(hostname: string): string; + + suffixInPSL(hostname: string): boolean; + + toSelfie(encoder?: null | Encoder): Selfie; + fromSelfie(selfie: Selfie, decoder?: null | Decoder): boolean; + + enableWASM(options?: { + customFetch?: null | ((url: URL) => Promise); + }): Promise; + disableWASM(): Promise; + } + + const psl: PublicSuffixList; + export default psl; +} diff --git a/Build/validate-domainset.js b/Build/validate-domainset.js index 332e5cdde..b1f548254 100644 --- a/Build/validate-domainset.js +++ b/Build/validate-domainset.js @@ -1,6 +1,6 @@ // Surge Domain Set can not include root domain from public suffix list. -const tldts = require('tldts'); +const tldts = require('tldts'); // hit ratio way too low, dont cache const picocolors = require('picocolors'); const path = require('path'); const listDir = require('@sukka/listdir'); @@ -21,7 +21,7 @@ const validateDomainSet = async (filePath) => { if (!line) { continue; } - const domain = line.charCodeAt(0) === 46 ? line.slice(1) : line; + const domain = line[0] === '.' ? line.slice(1) : line; const parsed = tldts.parse(domain, { allowPrivateDomains: true, detectIp: false }); if ( diff --git a/package.json b/package.json index a05e2de0f..108ef01b3 100644 --- a/package.json +++ b/package.json @@ -158,9 +158,11 @@ "ci-info": "^3.8.0", "cidr-tools-wasm": "^0.0.11", "fs-extra": "^11.1.1", + "gorhill-publicsuffixlist": "github:gorhill/publicsuffixlist.js", "mnemonist": "^0.39.5", "path-scurry": "^1.10.1", "picocolors": "^1.0.0", + "punycode": "^2.3.0", "table": "^6.8.1", "tar": "^6.2.0", "tldts": "^6.0.14", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 1c9dea821..3700625e5 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -34,6 +34,9 @@ dependencies: fs-extra: specifier: ^11.1.1 version: 11.1.1 + gorhill-publicsuffixlist: + specifier: github:gorhill/publicsuffixlist.js + version: github.com/gorhill/publicsuffixlist.js/3a1bc623073079184ff76933b88b7bf4f5d48978 mnemonist: specifier: ^0.39.5 version: 0.39.5 @@ -43,6 +46,9 @@ dependencies: picocolors: specifier: ^1.0.0 version: 1.0.0 + punycode: + specifier: ^2.3.0 + version: 2.3.0 table: specifier: ^6.8.1 version: 6.8.1 @@ -1318,8 +1324,8 @@ packages: signal-exit: 3.0.7 dev: true - /punycode@2.1.1: - resolution: {integrity: sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==} + /punycode@2.3.0: + resolution: {integrity: sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA==} engines: {node: '>=6'} /queue-microtask@1.2.3: @@ -1571,7 +1577,7 @@ packages: /uri-js@4.4.1: resolution: {integrity: sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==} dependencies: - punycode: 2.1.1 + punycode: 2.3.0 /webidl-conversions@3.0.1: resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==} @@ -1661,3 +1667,10 @@ packages: resolution: {integrity: sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==} engines: {node: '>=10'} dev: true + + github.com/gorhill/publicsuffixlist.js/3a1bc623073079184ff76933b88b7bf4f5d48978: + resolution: {tarball: https://codeload.github.com/gorhill/publicsuffixlist.js/tar.gz/3a1bc623073079184ff76933b88b7bf4f5d48978} + name: '@gorhill/publicsuffixlist' + version: 3.0.1 + engines: {node: '>=14.0.0', npm: '>=6.14.4'} + dev: false