Skip to content

Commit

Permalink
Perf: improve reject regex / regex sorting
Browse files Browse the repository at this point in the history
  • Loading branch information
SukkaW committed Apr 23, 2024
1 parent 97f64dc commit cbb22f3
Show file tree
Hide file tree
Showing 7 changed files with 114 additions and 150 deletions.
85 changes: 47 additions & 38 deletions Build/build-mitm-hostname.js → Build/build-mitm-hostname.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
const fsPromises = require('fs').promises;
const pathFn = require('path');
const table = require('table');
const listDir = require('@sukka/listdir');
const { green, yellow } = require('picocolors');
import { readFileByLine } from './lib/fetch-text-by-line';
import fsPromises from 'fs/promises';
import pathFn from 'path';
import table from 'table';
import listDir from '@sukka/listdir';
import { green, yellow } from 'picocolors';
import { processLineFromReadline } from './lib/process-line';
import { getHostname } from 'tldts';

const PRESET_MITM_HOSTNAMES = [
// '*baidu.com',
'*ydstatic.com',
'*.ydstatic.com',
// '*snssdk.com',
'*musical.com',
// '*musical.com',
// '*musical.ly',
// '*snssdk.ly',
'api.zhihu.com',
'www.zhihu.com',
'api.chelaile.net.cn',
'atrace.chelaile.net.cn',
'*.meituan.net',
Expand All @@ -20,8 +25,15 @@ const PRESET_MITM_HOSTNAMES = [
'ctrl.zmzapi.net',
'api.zhuishushenqi.com',
'b.zhuishushenqi.com',
'*.music.126.net',
'*.prod.hosts.ooklaserver.net'
'ggic.cmvideo.cn',
'ggic2.cmvideo.cn',
'mrobot.pcauto.com.cn',
'mrobot.pconline.com.cn',
'home.umetrip.com',
'discardrp.umetrip.com',
'startup.umetrip.com',
'dsp-x.jd.com',
'bdsp-x.jd.com'
];

(async () => {
Expand Down Expand Up @@ -51,7 +63,7 @@ const PRESET_MITM_HOSTNAMES = [
}))
);

const bothWwwApexDomains = [];
const bothWwwApexDomains: Array<{ origin: string, processed: string }> = [];
urlRegexPaths = urlRegexPaths.map(i => {
if (!i.processed.includes('{www or not}')) return i;

Expand All @@ -70,17 +82,21 @@ const PRESET_MITM_HOSTNAMES = [
urlRegexPaths.push(...bothWwwApexDomains);

await Promise.all(rulesets.map(async file => {
const content = (await fsPromises.readFile(pathFn.join(folderListPath, file), { encoding: 'utf-8' })).split('\n');
const content = await processLineFromReadline(readFileByLine(pathFn.join(folderListPath, file)));
urlRegexPaths.push(
...content
.filter(i => i.startsWith('URL-REGEX'))
.filter(i => (
i.startsWith('URL-REGEX')
&& !i.includes('http://')
))
.map(i => i.split(',')[1])
.map(i => ({
origin: i,
processed: i
.replaceAll('^https?://', '')
.replaceAll('^https://', '')
.replaceAll('^http://', '')
.split('/')[0]
.replaceAll('\\.', '.')
.replaceAll('.+', '*')
.replaceAll('\\d', '*')
Expand All @@ -95,21 +111,21 @@ const PRESET_MITM_HOSTNAMES = [
}));

const mitmDomains = new Set(PRESET_MITM_HOSTNAMES); // Special case for parsed failed
const parsedFailures = [];
const parsedFailures = new Set();

const dedupedUrlRegexPaths = [...new Set(urlRegexPaths)];

dedupedUrlRegexPaths.forEach(i => {
const result = parseDomain(i.processed);
const result = getHostnameSafe(i.processed);

if (result.success) {
mitmDomains.add(result.hostname.trim());
if (result) {
mitmDomains.add(result);
} else {
parsedFailures.add(i.origin);
parsedFailures.add(`${i.origin} ${i.processed} ${result}`);
}
});

const mitmDomainsRegExpArray = mitmDomains
const mitmDomainsRegExpArray = Array.from(mitmDomains)
.slice()
.filter(i => {
return i.length > 3
Expand All @@ -128,21 +144,21 @@ const PRESET_MITM_HOSTNAMES = [
);
});

const parsedDomainsData = [];
const parsedDomainsData: Array<[string, string]> = [];
dedupedUrlRegexPaths.forEach(i => {
const result = parseDomain(i.processed);
const result = getHostnameSafe(i.processed);

if (result.success) {
if (matchWithRegExpArray(result.hostname.trim(), mitmDomainsRegExpArray)) {
parsedDomainsData.push([green(result.hostname), i.origin]);
if (result) {
if (matchWithRegExpArray(result, mitmDomainsRegExpArray)) {
parsedDomainsData.push([green(result), i.origin]);
} else {
parsedDomainsData.push([yellow(result.hostname), i.origin]);
parsedDomainsData.push([yellow(result), i.origin]);
}
}
});

console.log('Mitm Hostnames:');
console.log(`hostname = %APPEND% ${mitmDomains.join(', ')}`);
console.log(`hostname = %APPEND% ${Array.from(mitmDomains).join(', ')}`);
console.log('--------------------');
console.log('Parsed Sucessed:');
console.log(table.table(parsedDomainsData, {
Expand All @@ -159,21 +175,14 @@ const PRESET_MITM_HOSTNAMES = [
})();

/** Util function */
function parseDomain(input) {
try {
const url = new URL(`https://${input}`);
return {
success: true,
hostname: url.hostname
};
} catch {
return {
success: false
};
}

function getHostnameSafe(input: string) {
const res = getHostname(input);
if (res && /[^\s\w*.-]/.test(res)) return null;
return res;
}

function matchWithRegExpArray(input, regexps = []) {
function matchWithRegExpArray(input: string, regexps: RegExp[] = []) {
for (const r of regexps) {
if (r.test(input)) return true;
}
Expand Down
31 changes: 26 additions & 5 deletions Build/lib/create-file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -115,16 +115,37 @@ const sortTypeOrder: Record<string | typeof defaultSortTypeOrder, number> = {
'USER-AGENT': 30,
'PROCESS-NAME': 40,
[defaultSortTypeOrder]: 50, // default sort order for unknown type
AND: 100,
OR: 100,
'IP-CIDR': 200,
'IP-CIDR6': 200
'URL-REGEX': 100,
AND: 300,
OR: 300,
'IP-CIDR': 400,
'IP-CIDR6': 400
};
// sort DOMAIN-SUFFIX and DOMAIN first, then DOMAIN-KEYWORD, then IP-CIDR and IP-CIDR6 if any
export const sortRuleSet = (ruleSet: string[]) => ruleSet
.map((rule) => {
const type = collectType(rule);
return [type ? (type in sortTypeOrder ? sortTypeOrder[type] : sortTypeOrder[defaultSortTypeOrder]) : 10, rule] as const;
if (!type) {
return [10, rule] as const;
}
if (!(type in sortTypeOrder)) {
return [sortTypeOrder[defaultSortTypeOrder], rule] as const;
}
if (type === 'URL-REGEX') {
let extraWeight = 0;
if (rule.includes('.+') || rule.includes('.*')) {
extraWeight += 10;
}
if (rule.includes('|')) {
extraWeight += 1;
}

return [
sortTypeOrder[type] + extraWeight,
rule
] as const;
}
return [sortTypeOrder[type], rule] as const;
})
.sort((a, b) => a[0] - b[0])
.map(c => c[1]);
Expand Down
2 changes: 1 addition & 1 deletion Modules/sukka_mitm_hostnames.sgmodule
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

[MITM]

hostname = %APPEND% update.pan.baidu.com, c.tieba.baidu.com, cover.baidu.com, *ydstatic.com, api.chelaile.net.cn, atrace.chelaile.net.cn, *.meituan.net, ctrl.playcvn.com, ctrl.playcvn.net, ctrl.zmzapi.com, ctrl.zmzapi.net, api.zhuishushenqi.com, b.zhuishushenqi.com, *.music.126.net, *.prod.hosts.ooklaserver.net, api.abema.io, g.cn, google.cn, ign.xn--fiqs8s, abbyychina.com, bartender.cc, betterzip.net, beyondcompare.cc, bingdianhuanyuan.cn, chemdraw.com.cn, codesoftchina.com, coreldrawchina.com, crossoverchina.com, easyrecoverychina.com, ediuschina.com, flstudiochina.com, formysql.com, guitarpro.cc, huishenghuiying.com.cn, iconworkshop.cn, imindmap.cc, jihehuaban.com.cn, keyshot.cc, mathtype.cn, mindmanager.cc, mindmapper.cc, mycleanmymac.com, nicelabel.cc, ntfsformac.cc, ntfsformac.cn, overturechina.com, passwordrecovery.cn, pdfexpert.cc, ultraiso.net, vegaschina.cn, xmindchina.net, xshellcn.com, yuanchengxiezuo.com, zbrushcn.com, aweme-eagle*.snssdk.com, union.click.jd.com, gw.alicdn.com, www.g.cn, www.google.cn, www.ign.xn--fiqs8s, www.abbyychina.com, www.bartender.cc, www.betterzip.net, www.beyondcompare.cc, www.bingdianhuanyuan.cn, www.chemdraw.com.cn, www.codesoftchina.com, www.coreldrawchina.com, www.crossoverchina.com, www.easyrecoverychina.com, www.ediuschina.com, www.flstudiochina.com, www.formysql.com, www.guitarpro.cc, www.huishenghuiying.com.cn, www.iconworkshop.cn, www.imindmap.cc, www.jihehuaban.com.cn, www.keyshot.cc, www.mathtype.cn, www.mindmanager.cc, www.mindmapper.cc, www.mycleanmymac.com, www.nicelabel.cc, www.ntfsformac.cc, www.ntfsformac.cn, www.overturechina.com, www.passwordrecovery.cn, www.pdfexpert.cc, www.ultraiso.net, www.vegaschina.cn, www.xmindchina.net, www.xshellcn.com, www.yuanchengxiezuo.com, www.zbrushcn.com, premiumyva.appspot.com, service.4gtv.tv, issuecdn.baidupcs.com, app.bilibili.com, api.bilibili.com, asp.cntv.myalicdn.com, cntv.hls.cdn.myqcloud.com, v.cctv.com, www.cntv.cn, img-ys011.didistatic.com, act.vip.iqiyi.com, iface.iqiyi.com, counter.ksosoft.com, *.kingsoft-office-service.com, dict-mobile.iciba.com, ios.wps.cn, mobile-pic.cache.iciba.com, service.iciba.com, iad.*mat.*.126.net, iad.*mat.*.127.net, client.mail.163.com, c.m.163.com, dsp-impr2.youdao.com, oimage*.ydstatic.com, sp.kaola.com, support.you.163.com, agent-count.pconline.com.cn, mrobot.pcauto.com.cn, mrobot.pconline.com.cn, edit.sinaapp.com, tqt.weibo.cn, sdkapp.uve.weibo.com, wbapp.uve.weibo.com, api.k.sohu.com, api.tv.sohu.com, hui.sohu.com, pic.k.sohu.com, s1.api.tv.itc.cn, api.m.jd.com, dsp-x.jd.com, bdsp-x.jd.com, ms.jr.jd.com, huichuan.sm.cn, iflow.uczzd.cn, mp.weixin.qq.com, adse.ximalaya.com, fdfs.xmcdn.com, www.zhihu.com, api.zhihu.com, *.58cdn.com.cn, app.58.com, aes.acfun.cn, dsp.toutiao.com, nnapp.cloudbae.cn, gw.aihuishou.com, m*.amap.com, 7n.bczcdn.com, www.myhug.cn, app.api.ke.com, channel.beitaichufang.com, iapi.bishijie.com, api.intsig.net, cap.caocaokeji.cn, pic1.chelaile.net.cn, app.10086.cn, m.client.10010.com, www.dandanzan.com, mapi.dangdang.com, api.daydaycook.com.cn, cms.daydaycook.com.cn, mobile-api2011.elong.com, www.facebook.com, acs.m.taobao.com, www.flyertea.com, foodie-api.yiruikecorp.com, cdn.api.fotoable.com, gateway.shouqiev.com, m.ibuscloud.com, smkmp.96225.com, games.mobileapi.hupu.com, imeclient.openspeech.cn, img.jiemian.com, api.jxedt.com, richmanapi.jxedt.com, static1.keepcdn.com, api.gotokeep.com, res.kfc.com.cn, render.alipay.com, api.kkmh.com, gw-passenger.01zhuanche.com, api.smzdm.com, snailsleep.net, a.sfansclub.com, api5.futunn.com, qt.qq.com, ssl.kohsocialapp.qq.com, 3gimg.qq.com, newsso.map.qq.com, r.inews.qq.com, vv.video.qq.com, adpai.thepaper.cn, images.client.vip.xunlei.com, 47.97.20.12, api.gaoqingdianshi.com, pss.txffp.com, app.variflight.com, static.vuevideo.net, api.wallstreetcn.com, app.wy.guahao.com, overseas.weico.cc, thor.weidian.com, nochange.ggsafe.com, cmsapi.wifi8.com, api-release.wuta-cam.com, res-release.wuta-cam.com, api.xiachufang.com, mapi.mafengwo.cn, mob.mddcloud.com.cn, mangaapi.manhuaren.com, capi.mwee.cn, api.m.mi.com, api.jr.mi.com, api-mifit.huami.com, b-api.ins.miaopai.com, ggic.cmvideo.cn, ggic2.cmvideo.cn, app.mixcapp.com, api.mgzf.com, cdn.moji.com, dili.bdatu.com, wap.ngchina.cn, supportda.ofo.com, ma.ofo.com, activity2.api.ofo.com, app3.qdaily.com, notch.qdaily.com, media.qyer.com, open.qyer.com, api.qiuduoduo.cn, api.rr.tv, api.videozhishi.com, msspjh.emarbox.com, www.shihuo.cn, api.psy-1.com, portal-xunyou.qingcdn.com, m.yap.yahoo.com, i.ys7.com, api.catch.gift, *.iydsj.com, a.qiumibao.com, api01pbmp.zhuishushenqi.com, dspsdk.abreader.com, mi.gdt.qq.com, y.gtimg.cn, nomo.dafork.com, manga.bilibili.com
hostname = %APPEND% *.ydstatic.com, api.chelaile.net.cn, atrace.chelaile.net.cn, *.meituan.net, ctrl.playcvn.com, ctrl.playcvn.net, ctrl.zmzapi.com, ctrl.zmzapi.net, api.zhuishushenqi.com, b.zhuishushenqi.com, ggic.cmvideo.cn, ggic2.cmvideo.cn, mrobot.pcauto.com.cn, mrobot.pconline.com.cn, home.umetrip.com, discardrp.umetrip.com, startup.umetrip.com, dsp-x.jd.com, bdsp-x.jd.com, api.abema.io, g.cn, google.cn, ign.xn--fiqs8s, abbyychina.com, bartender.cc, betterzip.net, beyondcompare.cc, bingdianhuanyuan.cn, chemdraw.com.cn, codesoftchina.com, coreldrawchina.com, crossoverchina.com, easyrecoverychina.com, ediuschina.com, flstudiochina.com, formysql.com, guitarpro.cc, huishenghuiying.com.cn, iconworkshop.cn, imindmap.cc, jihehuaban.com.cn, keyshot.cc, mathtype.cn, mindmanager.cc, mindmapper.cc, mycleanmymac.com, nicelabel.cc, ntfsformac.cc, ntfsformac.cn, overturechina.com, passwordrecovery.cn, pdfexpert.cc, ultraiso.net, vegaschina.cn, xmindchina.net, xshellcn.com, yuanchengxiezuo.com, zbrushcn.com, union.click.jd.com, nomo.dafork.com, www.g.cn, www.google.cn, www.ign.xn--fiqs8s, www.abbyychina.com, www.bartender.cc, www.betterzip.net, www.beyondcompare.cc, www.bingdianhuanyuan.cn, www.chemdraw.com.cn, www.codesoftchina.com, www.coreldrawchina.com, www.crossoverchina.com, www.easyrecoverychina.com, www.ediuschina.com, www.flstudiochina.com, www.formysql.com, www.guitarpro.cc, www.huishenghuiying.com.cn, www.iconworkshop.cn, www.imindmap.cc, www.jihehuaban.com.cn, www.keyshot.cc, www.mathtype.cn, www.mindmanager.cc, www.mindmapper.cc, www.mycleanmymac.com, www.nicelabel.cc, www.ntfsformac.cc, www.ntfsformac.cn, www.overturechina.com, www.passwordrecovery.cn, www.pdfexpert.cc, www.ultraiso.net, www.vegaschina.cn, www.xmindchina.net, www.xshellcn.com, www.yuanchengxiezuo.com, www.zbrushcn.com, premiumyva.appspot.com, cover.baidu.com, c.tieba.baidu.com, issuecdn.baidupcs.com, update.pan.baidu.com, app.bilibili.com, www.cntv.cn, img-ys011.didistatic.com, act.vip.iqiyi.com, iface.iqiyi.com, counter.ksosoft.com, ios.wps.cn, mobile-pic.cache.iciba.com, service.iciba.com, client.mail.163.com, c.m.163.com, dsp-impr2.youdao.com, sp.kaola.com, support.you.163.com, agent-count.pconline.com.cn, tqt.weibo.cn, edit.sinaapp.com, wbapp.uve.weibo.com, api.k.sohu.com, api.tv.sohu.com, hui.sohu.com, s1.api.tv.itc.cn, b, api.m.jd.com, ms.jr.jd.com, huichuan.sm.cn, iflow.uczzd.cn, adse.ximalaya.com, www.zhihu.com, app.58.com, aes.acfun.cn, acs.m.taobao.com, dsp.toutiao.com, nnapp.cloudbae.cn, gw.aihuishou.com, m*.amap.com, 7n.bczcdn.com, www.myhug.cn, app.api.ke.com, channel.beitaichufang.com, iapi.bishijie.com, api.intsig.net, cap.caocaokeji.cn, pic1.chelaile.net.cn, m.client.10010.com, www.dandanzan.com, mapi.dangdang.com, api.daydaycook.com.cn, cms.daydaycook.com.cn, www.flyertea.com, cdn.api.fotoable.com, gateway.shouqiev.com, m.ibuscloud.com, smkmp.96225.com, imeclient.openspeech.cn, img.jiemian.com, api.jxedt.com, richmanapi.jxedt.com, api.gotokeep.com, res.kfc.com.cn, api.smzdm.com, api5.futunn.com, qt.qq.com, 3gimg.qq.com, newsso.map.qq.com, vv.video.qq.com, szextshort.weixin.qq.com, y.gtimg.cn, api.gaoqingdianshi.com, pss.txffp.com, app.variflight.com, static.vuevideo.net, api.wallstreetcn.com, app.wy.guahao.com, thor.weidian.com, nochange.ggsafe.com, api-release.wuta-cam.com, res-release.wuta-cam.com, api.xiachufang.com, mapi.mafengwo.cn, mangaapi.manhuaren.com, img.meituan.net, capi.mwee.cn, api.m.mi.com, b-api.ins.miaopai.com, app.mixcapp.com, api.mgzf.com, dili.bdatu.com, wap.ngchina.cn, app3.qdaily.com, notch.qdaily.com, media.qyer.com, api.qiuduoduo.cn, api.rr.tv, api.videozhishi.com, msspjh.emarbox.com, www.shihuo.cn, api.psy-1.com, m.yap.yahoo.com, i.ys7.com, api.catch.gift, a.qiumibao.com, api01pbmp.zhuishushenqi.com, dspsdk.abreader.com, mi.gdt.qq.com, service.4gtv.tv, api.bilibili.com, manga.bilibili.com, sdkapp.uve.weibo.com, api.zhihu.com, app.10086.cn, mobile-api2011.elong.com, foodie-api.yiruikecorp.com, gw-passenger.01zhuanche.com, snailsleep.net, a.sfansclub.com, r.inews.qq.com, mp.weixin.qq.com, cmsapi.wifi8.com, mob.mddcloud.com.cn, p*.meituan.net, api.jr.mi.com, home.mi.com, api-mifit.huami.com, cdn.moji.com, open.qyer.com, portal-xunyou.qingcdn.com, asp.cntv.myalicdn.com, cntv.hls.cdn.myqcloud.com, v.cctv.com, *.kingsoft-office-service.com, oimage*.ydstatic.com, *.58cdn.com.cn, static1.keepcdn.com, adpai.thepaper.cn, images.client.vip.xunlei.com, s3plus.meituan.net, *.iydsj.com, dict-mobile.iciba.com, games.mobileapi.hupu.com, api.kkmh.com
5 changes: 1 addition & 4 deletions Modules/sukka_url_rewrite.sgmodule
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,7 @@

# Special AD Block Section

# >> Kugou
^https?://(2(5[0-5]{1}|[0-4]\d{1})|[0-1]?\d{1,2})(\.(2(5[0-5]{1}|[0-4]\d{1})|[0-1]?\d{1,2})){3}/EcomResourceServer/AdPlayPage/adinfo - reject
^https?://(2(5[0-5]{1}|[0-4]\d{1})|[0-1]?\d{1,2})(\.(2(5[0-5]{1}|[0-4]\d{1})|[0-1]?\d{1,2})){3}/MobileAdServer/ - reject
# >> eLong
^https?://(2(5[0-5]{1}|[0-4]\d{1})|[0-1]?\d{1,2})(\.(2(5[0-5]{1}|[0-4]\d{1})|[0-1]?\d{1,2})){3}/(adgateway|adv)/ - reject
^https?://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/(adgateway|adv)/ - reject
# >> NOMO
^https?://nomo.dafork.com/api/v3/iap/ios_product_list https://ruleset.skk.moe/Mock/nomo.json 302
1 change: 0 additions & 1 deletion Source/domainset/reject_sukka.conf
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,6 @@ pixel.wp.com
.cloudflareinsights.com
.histats.com
.appmetrica.yandex.net
.crazyegg.com
trace2.rtbasia.com
inside.rtbasia.com
.atom-data.io
Expand Down
1 change: 1 addition & 0 deletions Source/non_ip/domestic.conf
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,7 @@ DOMAIN-SUFFIX,qichacha.com
DOMAIN-SUFFIX,qdaily.com
DOMAIN-SUFFIX,qidian.com
DOMAIN-SUFFIX,qiniu.com
DOMAIN-SUFFIX,qingcdn.com
DOMAIN-SUFFIX,qyer.com
DOMAIN-SUFFIX,qyerstatic.com
DOMAIN-SUFFIX,ronghub.com
Expand Down
Loading

0 comments on commit cbb22f3

Please sign in to comment.