Skip to content

Commit

Permalink
Chore: process line stream
Browse files Browse the repository at this point in the history
  • Loading branch information
SukkaW committed Nov 26, 2024
1 parent e2920de commit d0a5847
Show file tree
Hide file tree
Showing 11 changed files with 65 additions and 31 deletions.
8 changes: 2 additions & 6 deletions Build/build-cdn-download-conf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,14 @@ import { task } from './trace';
import { SHARED_DESCRIPTION } from './constants/description';
import { appendArrayInPlace } from './lib/append-array-in-place';
import { SOURCE_DIR } from './constants/dir';
import { processLine } from './lib/process-line';
import { DomainsetOutput } from './lib/create-file';
import { CRASHLYTICS_WHITELIST } from './constants/reject-data-source';

const getS3OSSDomainsPromise = (async (): Promise<string[]> => {
const trie = new HostnameTrie();

for await (const line of await fetchRemoteTextByLine('https://publicsuffix.org/list/public_suffix_list.dat')) {
const tmp = processLine(line);
if (tmp) {
trie.add(tmp);
}
for await (const line of await fetchRemoteTextByLine('https://publicsuffix.org/list/public_suffix_list.dat', true)) {
trie.add(line);
}

/**
Expand Down
5 changes: 2 additions & 3 deletions Build/build-chn-cidr.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import { fetchRemoteTextByLine } from './lib/fetch-text-by-line';
import { processLineFromReadline } from './lib/process-line';
import { task } from './trace';

import { contains as containsCidr, exclude as excludeCidr } from 'fast-cidr-tools';
Expand All @@ -19,8 +18,8 @@ const PROBE_CHN_CIDR_V4 = [
export const getChnCidrPromise = createMemoizedPromise(cachedOnlyFail(
async function getChnCidr() {
const [_cidr4, cidr6] = await Promise.all([
fetchRemoteTextByLine('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt').then(processLineFromReadline),
fetchRemoteTextByLine('https://gaoyifan.github.io/china-operator-ip/china6.txt').then(processLineFromReadline)
fetchRemoteTextByLine('https://raw.githubusercontent.com/misakaio/chnroutes2/master/chnroutes.txt', true).then(Array.fromAsync<string>),
fetchRemoteTextByLine('https://gaoyifan.github.io/china-operator-ip/china6.txt', true).then(Array.fromAsync<string>)
]);

const cidr4 = excludeCidr(
Expand Down
2 changes: 1 addition & 1 deletion Build/build-reject-ip-list.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ const getBogusNxDomainIPsPromise: Promise<[ipv4: string[], ipv6: string[]]> = $f
const ipv4: string[] = [];
const ipv6: string[] = [];

for await (const line of createReadlineInterfaceFromResponse(resp)) {
for await (const line of createReadlineInterfaceFromResponse(resp, true)) {
if (line.startsWith('bogus-nxdomain=')) {
const ip = line.slice(15).trim();
if (isProbablyIpv4(ip)) {
Expand Down
6 changes: 1 addition & 5 deletions Build/build-telegram-cidr.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// @ts-check
import { createReadlineInterfaceFromResponse } from './lib/fetch-text-by-line';
import { isProbablyIpv4, isProbablyIpv6 } from './lib/is-fast-ip';
import { processLine } from './lib/process-line';
import { task } from './trace';
import { SHARED_DESCRIPTION } from './constants/description';
import { createMemoizedPromise } from './lib/memo-promise';
Expand All @@ -16,10 +15,7 @@ export const getTelegramCIDRPromise = createMemoizedPromise(async () => {
const ipcidr: string[] = [];
const ipcidr6: string[] = [];

for await (const line of createReadlineInterfaceFromResponse(resp)) {
const cidr = processLine(line);
if (!cidr) continue;

for await (const cidr of createReadlineInterfaceFromResponse(resp, true)) {
const [subnet] = cidr.split('/');
if (isProbablyIpv4(subnet)) {
ipcidr.push(cidr);
Expand Down
3 changes: 1 addition & 2 deletions Build/lib/aho-corasick.bench.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import { fetchRemoteTextByLine } from './fetch-text-by-line';
import { processLineFromReadline } from './process-line';

import createKeywordFilter from './aho-corasick';

Expand Down Expand Up @@ -36,7 +35,7 @@ if (require.main === module) {
(async () => {
const { bench, group, run } = await import('mitata');

const data = await processLineFromReadline(await fetchRemoteTextByLine('https://easylist.to/easylist/easylist.txt'));
const data = await Array.fromAsync(await fetchRemoteTextByLine('https://easylist.to/easylist/easylist.txt', true));
console.log({ dataLen: data.length });
const keywordsSet = [
'!',
Expand Down
15 changes: 10 additions & 5 deletions Build/lib/fetch-text-by-line.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import readline from 'node:readline';
import { TextLineStream } from './text-line-transform-stream';
import type { ReadableStream } from 'node:stream/web';
import { TextDecoderStream } from 'node:stream/web';
import { processLine } from './process-line';
import { processLine, ProcessLineStream } from './process-line';
import { $fetch } from './make-fetch-happen';
import type { NodeFetchResponse } from './make-fetch-happen';
import type { UndiciResponseData } from './fetch-retry';
Expand Down Expand Up @@ -40,7 +40,7 @@ function ensureResponseBody<T extends NodeFetchResponse | UndiciResponseData | U
return resp.body;
}

export const createReadlineInterfaceFromResponse: ((resp: NodeFetchResponse | UndiciResponseData | UnidiciWebResponse) => AsyncIterable<string>) = (resp) => {
export const createReadlineInterfaceFromResponse: ((resp: NodeFetchResponse | UndiciResponseData | UnidiciWebResponse, processLine?: boolean) => ReadableStream<string>) = (resp, processLine = false) => {
const stream = ensureResponseBody(resp);

const webStream: ReadableStream<Uint8Array> = 'getReader' in stream
Expand All @@ -51,13 +51,18 @@ export const createReadlineInterfaceFromResponse: ((resp: NodeFetchResponse | Un
: Readable.toWeb(new Readable().wrap(stream))
);

return webStream
const resultStream = webStream
.pipeThrough(new TextDecoderStream())
.pipeThrough(new TextLineStream());

if (processLine) {
return resultStream.pipeThrough(new ProcessLineStream());
}
return resultStream;
};

export function fetchRemoteTextByLine(url: string) {
return $fetch(url).then(createReadlineInterfaceFromResponse);
export function fetchRemoteTextByLine(url: string, processLine = false): Promise<AsyncIterable<string>> {
return $fetch(url).then(resp => createReadlineInterfaceFromResponse(resp, processLine));
}

export async function readFileIntoProcessedArray(file: string /* | FileHandle */) {
Expand Down
2 changes: 1 addition & 1 deletion Build/lib/parse-dnsmasq.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export function extractDomainsFromFelixDnsmasq(line: string): string | null {
export async function parseFelixDnsmasqFromResp(resp: NodeFetchResponse | UndiciResponseData | Response): Promise<string[]> {
const results: string[] = [];

for await (const line of createReadlineInterfaceFromResponse(resp)) {
for await (const line of createReadlineInterfaceFromResponse(resp, true)) {
const domain = extractDomainsFromFelixDnsmasq(line);
if (domain && isDomainLoose(domain)) {
results.push(domain);
Expand Down
46 changes: 44 additions & 2 deletions Build/lib/process-line.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ export function processLine(line: string): string | null {
const line_0: string = trimmed[0];

if (
line_0 === '#'
|| line_0 === ' '
line_0 === ' '
|| line_0 === '\r'
|| line_0 === '\n'
|| line_0 === '!'
Expand All @@ -21,6 +20,24 @@ export function processLine(line: string): string | null {
return null;
}

if (line_0 === '#') {
if (trimmed[1] !== '#') {
// # Comment
return null;
}
if (trimmed[2] === '#' && trimmed[3] === '#') {
// ################## EOF ##################
return null;
}
/**
* AdGuard Filter can be:
*
* ##.class
* ##tag.class
* ###id
*/
}

return trimmed;
}

Expand All @@ -34,3 +51,28 @@ export async function processLineFromReadline(rl: AsyncIterable<string>): Promis
}
return res;
}

export class ProcessLineStream extends TransformStream<string, string> {
// private __buf = '';
constructor() {
super({
transform(l, controller) {
const line = processLine(l);
if (line) {
controller.enqueue(line);
}
}
});
}
}

// export class ProcessLineNodeStream extends Transform {
// _transform(chunk: string, encoding: BufferEncoding, callback: TransformCallback) {
// // Convert chunk to string and then to uppercase
// const upperCased = chunk.toUpperCase();
// // Push transformed data to readable side
// this.push(upperCased);
// // Call callback when done
// callback();
// }
// }
3 changes: 1 addition & 2 deletions Build/lib/set-add-from-array.bench.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import { fetchRemoteTextByLine } from './fetch-text-by-line';
import { processLineFromReadline } from './process-line';

import { bench, group, run } from 'mitata';

(async () => {
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
const data = await Array.fromAsync(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true));

group(() => {
bench('setAddFromArray', () => {
Expand Down
3 changes: 1 addition & 2 deletions Build/lib/stable-sort-domain.bench.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import { fetchRemoteTextByLine } from './fetch-text-by-line';
import { processLineFromReadline } from './process-line';
import { sortDomains } from './stable-sort-domain';

import { bench, group, run } from 'mitata';

(async () => {
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
const data = await Array.fromAsync(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true));

group(() => {
bench('sortDomains', () => sortDomains(data));
Expand Down
3 changes: 1 addition & 2 deletions Build/lib/tldts.bench.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import { fetchRemoteTextByLine } from './fetch-text-by-line';
import { processLineFromReadline } from './process-line';

import { bench, group, run } from 'mitata';

import * as tldts from 'tldts';
import * as tldtsExperimental from 'tldts-experimental';

(async () => {
const data = await processLineFromReadline(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt'));
const data = await Array.fromAsync(await fetchRemoteTextByLine('https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt', true));

const tldtsOpt: Parameters<typeof tldts.getDomain>[1] = {
allowPrivateDomains: false,
Expand Down

0 comments on commit d0a5847

Please sign in to comment.