Skip to content

Commit

Permalink
Perf: many changes
Browse files Browse the repository at this point in the history
- Hoist process hosts line callback
- Reduce dp hosts file size
- Reduce domain sort
  • Loading branch information
SukkaW committed May 2, 2024
1 parent 10bde9f commit e5d511d
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 34 deletions.
9 changes: 2 additions & 7 deletions Build/build-reject-domainset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,9 @@ import { getPhishingDomains } from './lib/get-phishing-domains';

import * as SetHelpers from 'mnemonist/set';
import { setAddFromArray } from './lib/set-add-from-array';
import type { PublicSuffixList } from '@gorhill/publicsuffixlist';

export const buildRejectDomainSet = task(import.meta.path, async (span) => {
const gorhillPromise = getGorhillPublicSuffixPromise();
const gorhillPeeked = Bun.peek(gorhillPromise);
const gorhill: PublicSuffixList = gorhillPeeked === gorhillPromise
? await gorhillPromise
: (gorhillPeeked as PublicSuffixList);
const gorhill = await getGorhillPublicSuffixPromise();

/** Whitelists */
const filterRuleWhitelistDomainSets = new Set(PREDEFINED_WHITELIST);
Expand Down Expand Up @@ -126,7 +121,7 @@ export const buildRejectDomainSet = task(import.meta.path, async (span) => {
const kwfilter = createKeywordFilter(domainKeywordsSet);

for (const domain of domainSets) {
// Remove keyword
// Remove keyword
if (kwfilter(domain)) {
domainSets.delete(domain);
}
Expand Down
4 changes: 3 additions & 1 deletion Build/build-reject-ip-list.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,10 @@ const getBotNetFilterIPsPromise = fsFetchCache.apply(
}
);

const localRejectIPSourcesPromise = readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/ip/reject.conf'));

export const buildRejectIPList = task(import.meta.path, async (span) => {
const result: string[] = await readFileIntoProcessedArray(path.resolve(import.meta.dir, '../Source/ip/reject.conf'));
const result = await localRejectIPSourcesPromise;

const bogusNxDomainIPs = await span.traceChildPromise('get bogus nxdomain ips', getBogusNxDomainIPsPromise);
const botNetIPs = await span.traceChildPromise('get botnet ips', getBotNetFilterIPsPromise);
Expand Down
2 changes: 1 addition & 1 deletion Build/build-speedtest-domainset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ export const buildSpeedtestDomainSet = task(import.meta.path, async (span) => {
});

resolve();
}, 1000 * 60 * 2);
}, 1000 * 60 * 1.5);

Promise.all(Object.values(pMap)).then(() => {
clearTimeout(timer);
Expand Down
47 changes: 24 additions & 23 deletions Build/lib/parse-filter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,37 +46,38 @@ export function processDomainLists(span: Span, domainListsUrl: string, includeAl
}
));
}
export function processHosts(span: Span, hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, ttl: number | null = null) {
const domainSets = new Set<string>();

const lineCb = (l: string) => {
const line = processLine(l);
if (!line) {
return;
}
const hostsLineCb = (l: string, set: Set<string>, includeAllSubDomain: boolean, meta: string) => {
const line = processLine(l);
if (!line) {
return;
}

const _domain = line.split(/\s/)[1]?.trim();
if (!_domain) {
return;
}
const domain = normalizeDomain(_domain);
if (!domain) {
return;
}
if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
console.warn(picocolors.red(hostsUrl), '(black)', domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
foundDebugDomain = true;
}
const _domain = line.split(/\s/)[1]?.trim();
if (!_domain) {
return;
}
const domain = normalizeDomain(_domain);
if (!domain) {
return;
}
if (DEBUG_DOMAIN_TO_FIND && domain.includes(DEBUG_DOMAIN_TO_FIND)) {
console.warn(picocolors.red(meta), '(black)', domain.replaceAll(DEBUG_DOMAIN_TO_FIND, picocolors.bold(DEBUG_DOMAIN_TO_FIND)));
foundDebugDomain = true;
}

domainSets.add(includeAllSubDomain ? `.${domain}` : domain);
};
set.add(includeAllSubDomain ? `.${domain}` : domain);
};

export function processHosts(span: Span, hostsUrl: string, mirrors: string[] | null, includeAllSubDomain = false, ttl: number | null = null) {
const domainSets = new Set<string>();

return span.traceChild(`processhosts: ${hostsUrl}`).traceAsyncFn((childSpan) => fsFetchCache.apply(
hostsUrl,
async () => {
if (mirrors == null || mirrors.length === 0) {
for await (const l of await fetchRemoteTextByLine(hostsUrl)) {
lineCb(l);
hostsLineCb(l, domainSets, includeAllSubDomain, hostsUrl);
}
} else {
const filterRules = await childSpan
Expand All @@ -85,7 +86,7 @@ export function processHosts(span: Span, hostsUrl: string, mirrors: string[] | n

childSpan.traceChild('parse hosts').traceSyncFn(() => {
for (let i = 0, len = filterRules.length; i < len; i++) {
lineCb(filterRules[i]);
hostsLineCb(filterRules[i], domainSets, includeAllSubDomain, hostsUrl);
}
});
}
Expand Down
3 changes: 2 additions & 1 deletion Build/lib/reject-data-source.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ export const HOSTS: HostsSource[] = [
true,
TTL.THREE_HOURS()
],
['https://someonewhocares.org/hosts/hosts', null, true, TTL.THREE_HOURS()],
// Dan Pollock's hosts file, 0.0.0.0 version is 30 KiB smaller
['https://someonewhocares.org/hosts/zero/hosts', null, true, TTL.THREE_HOURS()],
// no coin list is not actively maintained, but it updates daily when being maintained, so we set a 3 days cache ttl
['https://raw.githubusercontent.com/hoshsadiq/adblock-nocoin-list/master/hosts.txt', null, true, TTL.THREE_DAYS()],
// have not been updated for more than a year, so we set a 14 days cache ttl
Expand Down
10 changes: 9 additions & 1 deletion Build/lib/stable-sort-domain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,15 @@ export const sortDomains = (inputs: string[], gorhill: PublicSuffixList) => {

const sorter = (a: string, b: string) => {
if (a === b) return 0;
return compare(domains.get(a)!, domains.get(b)!) || compare(a, b);

const $a = domains.get(a)!;
const $b = domains.get(b)!;

// avoid compare same thing twice
if (a === $a && b === $b) {
return compare(a, b);
}
return compare($a, $b) || compare(a, b);
};

return inputs.sort(sorter);
Expand Down

0 comments on commit e5d511d

Please sign in to comment.