Ten Words Only Still Help: Improving Black-Box AI-Generated Text Detection via Proxy-Guided Efficient Re-Sampling
- Python: 3.11
- CUDA: 11.8
- Python Packages:
pip install -r requirements.txt
The binary, multiclass and OOD AIGT datasets are available at Google Drive.
This step is optional, as processed POGER Features and POGER-Mixture Features can be downloaded at Google Drive.
cd get_feature
export HF_TOKEN=hf_xxx # Fill in your HuggingFace access token
export OPENAI_API_KEY=sk-xxx # Fill in your OpenAI API key
python get_poger_feature.py \
--n 100 \
--k 10 \
--delta 1.2 \
--input ../data/train.jsonl \
--output ./train_poger_feature.jsonl
python get_poger_feature.py \
--n 100 \
--k 10 \
--delta 1.2 \
--input ../data/test.jsonl \
--output ./test_poger_feature.jsonl
This part of the code is modified from Jihuai-wpy/SeqXGPT under the Apache License 2.0.
cd get_feature/get_true_prob
# Launch inference server
nohup python backend_api.py --model gpt2 --gpu 0 --port 6001 &
nohup python backend_api.py --model gptj --gpu 0 --port 6002 &
nohup python backend_api.py --model llama2 --gpu 1 --port 6003 &
nohup python backend_api.py --model alpaca --gpu 2 --port 6004 &
nohup python backend_api.py --model vicuna --gpu 3 --port 6005 &
# Get true probability
python get_true_prob.py
cd get_feature
python get_poger_mix_feature.py \
--poger-feature ./train_poger_feature.jsonl \
--true-prob ./get_true_prob/result/train_true_prob.jsonl \
--output ./train_poger_mix_feature.jsonl
python get_poger_mix_feature.py \
--poger-feature ./test_poger_feature.jsonl \
--true-prob ./get_true_prob/result/test_true_prob.jsonl \
--output ./test_poger_mix_feature.jsonl
cd POGER
# POGER
python main.py \
--cuda \
--model poger \
--data-dir ../get_feature \
--data-name full_data
## POGER-Mixture
python main.py \
--cuda \
--model poger_mix \
--data-dir ../get_feature \
--data-name full_data
cd POGER
# POGER
python main.py \
--cuda \
--model poger \
--data-dir ../get_feature \
--test ./params/params_poger_full_data.pt
# POGER-Mixture
python main.py \
--cuda \
--model poger_mix \
--data-dir ../get_feature \
--test ./params/params_poger_mix_full_data.pt
@inproceedings{shi2024ten,
title={{Ten Words Only Still Help: Improving Black-Box AI-Generated Text Detection via Proxy-Guided Efficient Re-Sampling}},
author={Shi, Yuhui and Sheng, Qiang and Cao, Juan and Mi, Hao and Hu, Beizhe and Wang, Danding},
booktitle={Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence},
pages={494--502},
doi={10.24963/ijcai.2024/55},
year={2024}
}