From 9aed9f417178e1fefd744b8eac83a1372eafdd4d Mon Sep 17 00:00:00 2001 From: yangheng95 Date: Wed, 14 Sep 2022 23:01:06 +0100 Subject: [PATCH 01/12] add_defense --- .../reactive_defense/checkpoints-v1.16.json | 1 + .../reactive_defense/sst2_reactive_defense.py | 136 ++++++++++++++++++ requirements.txt | 1 + textattack/attacker.py | 36 ++++- .../universal_sentence_encoder/__init__.py | 1 - ...multilingual_universal_sentence_encoder.py | 19 ++- .../universal_sentence_encoder.py | 22 +-- textattack/dataset_args.py | 12 +- textattack/model_args.py | 23 ++- textattack/models/wrappers/__init__.py | 1 + .../models/wrappers/pyabsa_model_wrapper.py | 30 ++++ .../reactive_defense/reactive_defender.py | 20 +++ .../reactive_defense/tad_reactive_defender.py | 33 +++++ textattack/trainer.py | 94 ++++++------ 14 files changed, 359 insertions(+), 70 deletions(-) create mode 100644 examples/reactive_defense/checkpoints-v1.16.json create mode 100644 examples/reactive_defense/sst2_reactive_defense.py create mode 100644 textattack/models/wrappers/pyabsa_model_wrapper.py create mode 100644 textattack/reactive_defense/reactive_defender.py create mode 100644 textattack/reactive_defense/tad_reactive_defender.py diff --git a/examples/reactive_defense/checkpoints-v1.16.json b/examples/reactive_defense/checkpoints-v1.16.json new file mode 100644 index 000000000..0b518b3ce --- /dev/null +++ b/examples/reactive_defense/checkpoints-v1.16.json @@ -0,0 +1 @@ +{"1.16.0+": {"APC": {"english": {"id": "https://drive.google.com/file/d/1JIFhaAdoCeZI5CQqfOix3pnrYa6_Mf9S/view?usp=sharing", "Training Model": "FAST-LSA-S", "Training Dataset": "English", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.6.3+", "Checkpoint File": "fast_lsa_s_acc_84.9_f1_82.11.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "chinese": {"id": "https://drive.google.com/file/d/1B0RHazOCm2eOWLWExQkeapHr9d3OiZl7/view?usp=sharing", "Training Model": "FAST-LCF-MDeBERTa", "Training Dataset": "Chinese", "Language": "Chinese", "Description": "Trained on RTX3090", "Available Version": "1.8.2+", "Checkpoint File": "fast_lcf_bert_Chinese_acc_97.11_f1_96.54.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "multilingual": {"id": "https://drive.google.com/file/d/15ls1hcGvk27UnfMsXZtk780h7tTBbNws/view?usp=sharing", "Training Model": "FAST-LCF-Deberta", "Training Dataset": "Multilingual", "Language": "Multilingual", "Description": "Trained on RTX3090", "Available Version": "1.8.2+", "Checkpoint File": "fast_lcf_bert_Multilingual_acc_94.72_f1_90.07.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "multilingual2": {"id": "https://drive.google.com/file/d/1YCYKpEnff-DBUHd-8vWmgsnng5ZT_2cs/view?usp=sharing", "Training Model": "FAST-LCF-Deberta", "Training Dataset": "Multilingual", "Language": "Multilingual", "Description": "Trained on RTX3090", "Available Version": "1.10.5+", "Checkpoint File": "fast_lsa_t_v2_Multilingual_acc_88.44_f1_82.66.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "dlcf-dca-bert1": {"id": "https://drive.google.com/file/d/1w-NtWujPglsvZu4-jC6Vmu8Iz8CvX-1u/view?usp=sharing", "Training Model": "dlcf-dca-bert", "Training Dataset": "Laptop 14", "Language": "English", "Description": "Trained on T4", "Available Version": "1.1.17-1.1.23", "Author": "M, Xu (xumayi@m.scnu.edu.cn)"}, "dlcf-dca-bert2": {"id": "https://drive.google.com/file/d/1w-NtWujPglsvZu4-jC6Vmu8Iz8CvX-1u/view?usp=sharing", "Training Model": "dlcf-dca-bert", "Training Dataset": "Restaurant 14", "Language": "English", "Description": "Trained on T4", "Available Version": "1.1.17-1.1.23", "Author": "M, Xu (xumayi@m.scnu.edu.cn)"}, "HELP-WANTED": {"id": "", "Description": "You can help us by sharing checkpoints (e.g. models trained on you own datasets) with community.", "Checkpoint File": "PLEASE NOTE THAT THIS IS NOT A REAL CHECKPOINT!", "Available Version": ""}}, "ATEPC": {"english": {"id": "", "Training Model": "FAST-LCFS-ATEPC", "Training Dataset": "English", "Language": "English", "Description": "Trained on RTX3090, this checkpoint use bert-spc in ATEPC training", "Available Version": "1.16.0+", "Checkpoint File": "fast_lcf_atepc_English_cdw_apcacc_85.4_apcf1_82.53_atef1_80.19.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "chinese": {"id": "", "Training Model": "FAST-LCF-ATEPC", "Training Dataset": "Chinese", "Language": "Chinese", "Description": "Trained on RTX3090 BERT-BASE-CHINESE", "Available Version": "1.16.0+", "Checkpoint File": "fast_lcf_atepc_Chinese_cdw_apcacc_96.09_apcf1_95.14_atef1_83.69.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "multilingual": {"id": "", "Training Model": "FAST-LCF-ATEPC", "Training Dataset": "ABSADatasets.Multilingual", "Language": "Multilingual", "Description": "Trained on RTX3090", "Available Version": "1.16.0+", "Checkpoint File": "fast_lcf_atepc_Multilingual_cdw_apcacc_86.76_apcf1_79.78_atef1_78.03.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "multilingual-256": {"id": "", "Training Model": "FAST-LCF-ATEPC", "Training Dataset": "ABSADatasets.Multilingual", "Language": "Multilingual", "Description": "Trained on RTX3090", "Available Version": "1.16.0+", "Checkpoint File": "fast_lcf_atepc_Multilingual_cdw_apcacc_87.09_apcf1_79.95_atef1_75.55.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "multilingual-256-2": {"id": "", "Training Model": "FAST-LCF-ATEPC", "Training Dataset": "ABSADatasets.Multilingual", "Language": "Multilingual", "Description": "Trained on RTX3090", "Available Version": "1.16.0+", "Checkpoint File": "fast_lcf_atepc_Multilingual_cdw_apcacc_86.68_apcf1_80.63_atef1_75.15.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "HELP-WANTED": {"id": "", "Description": "You can help us by sharing checkpoints (e.g. models trained on you own datasets) with community.", "Checkpoint File": "PLEASE NOTE THAT THIS IS NOT A REAL CHECKPOINT!", "Available Version": ""}}, "TC": {"tc-sst2": {"id": "https://drive.google.com/file/d/1KgEB7MJ8bjrBiYdbtiojreeYegnUtq51/view?usp=sharing", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TC-SST2.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tc-agnews10k": {"id": "https://drive.google.com/file/d/1JiRL19maSBiu9_1VBpP1gMwN8qkZsAtC/view?usp=sharing", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TC-AGNews10K.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tc-imdb10k": {"id": "https://drive.google.com/file/d/1TD0xyKKEfs_S0Ze0EmEb16Eq5pdukjN_/view?usp=sharing", "Training Model": "TAD", "Training Dataset": "IMDB10K", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TC-IMDB10K.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "HELP-WANTED": {"id": "", "Description": "You can help us by sharing checkpoints (e.g. models trained on you own datasets) with community.", "Checkpoint File": "PLEASE NOTE THAT THIS IS NOT A REAL CHECKPOINT!", "Available Version": ""}}, "TAD": {"tad-sst2": {"id": "", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-SST2.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-sst2bae": {"id": "", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-SST2BAE.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-sst2pwws": {"id": "", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-SST2PWWS.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-sst2textfooler": {"id": "", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-SST2TextFooler.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-sst2": {"id": "", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-SST2.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-sst2bae": {"id": "", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-SST2BAE.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-sst2pwws": {"id": "", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-SST2PWWS.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-sst2textfooler": {"id": "", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-SST2TextFooler.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-agnews10k": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-AGNews10K.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-agnews10kbae": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-AGNews10KBAE.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-agnews10kpwws": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-AGNews10KPWWS.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-agnews10ktextfooler": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-AGNews10KTextFooler.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-agnews10k": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-AGNews10K.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-agnews10kbae": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-AGNews10KBAE.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-agnews10kpwws": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-AGNews10KPWWS.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-agnews10ktextfooler": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-AGNews10KTextFooler.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-amazon": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-Amazon.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-amazonbae": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-AmazonBAE.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-amazonpwws": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-AmazonPWWS.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-amazontextfooler": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-AmazonTextFooler.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-amazon": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-Amazon.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-amazonbae": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-AmazonBAE.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-amazonpwws": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-AmazonPWWS.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-amazontextfooler": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-AmazonTextFooler.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}}}, "1.0.0-1.15.7": {"APC": {"english": {"id": "https://drive.google.com/file/d/1JIFhaAdoCeZI5CQqfOix3pnrYa6_Mf9S/view?usp=sharing", "Training Model": "FAST-LSA-S", "Training Dataset": "English", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.6.3+", "Checkpoint File": "fast_lsa_s_acc_84.9_f1_82.11.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "chinese": {"id": "https://drive.google.com/file/d/1B0RHazOCm2eOWLWExQkeapHr9d3OiZl7/view?usp=sharing", "Training Model": "FAST-LCF-MDeBERTa", "Training Dataset": "Chinese", "Language": "Chinese", "Description": "Trained on RTX3090", "Available Version": "1.8.2+", "Checkpoint File": "fast_lcf_bert_Chinese_acc_97.11_f1_96.54.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "multilingual": {"id": "https://drive.google.com/file/d/15ls1hcGvk27UnfMsXZtk780h7tTBbNws/view?usp=sharing", "Training Model": "FAST-LCF-Deberta", "Training Dataset": "Multilingual", "Language": "Multilingual", "Description": "Trained on RTX3090", "Available Version": "1.8.2+", "Checkpoint File": "fast_lcf_bert_Multilingual_acc_94.72_f1_90.07.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "multilingual2": {"id": "https://drive.google.com/file/d/1YCYKpEnff-DBUHd-8vWmgsnng5ZT_2cs/view?usp=sharing", "Training Model": "FAST-LCF-Deberta", "Training Dataset": "Multilingual", "Language": "Multilingual", "Description": "Trained on RTX3090", "Available Version": "1.10.5+", "Checkpoint File": "fast_lsa_t_v2_Multilingual_acc_88.44_f1_82.66.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "dlcf-dca-bert1": {"id": "https://drive.google.com/file/d/1w-NtWujPglsvZu4-jC6Vmu8Iz8CvX-1u/view?usp=sharing", "Training Model": "dlcf-dca-bert", "Training Dataset": "Laptop 14", "Language": "English", "Description": "Trained on T4", "Available Version": "1.1.17-1.1.23", "Author": "M, Xu (xumayi@m.scnu.edu.cn)"}, "dlcf-dca-bert2": {"id": "https://drive.google.com/file/d/1w-NtWujPglsvZu4-jC6Vmu8Iz8CvX-1u/view?usp=sharing", "Training Model": "dlcf-dca-bert", "Training Dataset": "Restaurant 14", "Language": "English", "Description": "Trained on T4", "Available Version": "1.1.17-1.1.23", "Author": "M, Xu (xumayi@m.scnu.edu.cn)"}, "HELP-WANTED": {"id": "", "Description": "You can help us by sharing checkpoints (e.g. models trained on you own datasets) with community.", "Checkpoint File": "PLEASE NOTE THAT THIS IS NOT A REAL CHECKPOINT!", "Available Version": ""}}, "ATEPC": {"english": {"id": "https://drive.google.com/file/d/1_oBCLi_bjs4CxmEXfVIw8qZCmbJvr-PE/view?usp=sharing", "Training Model": "FAST-LCFS-ATEPC", "Training Dataset": "English", "Language": "English", "Description": "Trained on RTX3090, this checkpoint use bert-spc in ATEPC training", "Available Version": "1.8.4-1.15.7", "Checkpoint File": "fast_lcf_atepc_English_cdw_apcacc_85.03_apcf1_82.76_atef1_84.8.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "chinese": {"id": "https://drive.google.com/file/d/1wHlEeKbQg51LEgr-J353HQhyPgPDEMrp/view?usp=sharing", "Training Model": "FAST-LCF-ATEPC", "Training Dataset": "Chinese", "Language": "Chinese", "Description": "Trained on RTX3090 BERT-BASE-CHINESE", "Available Version": "1.8.4-1.15.7", "Checkpoint File": "fast_lcf_atepc_Chinese_cdw_apcacc_96.0_apcf1_94.96_atef1_91.34.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "multilingual": {"id": "https://drive.google.com/file/d/17r-dMGCUBKnWSQ-djN3MegASuJ7WLAwl/view?usp=sharing", "Training Model": "FAST-LCF-ATEPC", "Training Dataset": "ABSADatasets.Multilingual", "Language": "Multilingual", "Description": "Trained on RTX3090", "Available Version": "1.10.5-1.15.7", "Checkpoint File": "fast_lcf_atepc_Multilingual_cdw_apcacc_87.21_apcf1_81.53_atef1_82.82.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "multilingual-1.14.7": {"id": "https://drive.google.com/file/d/1gZRkcpSIqGfDacdTV4suqgRIfCbWgiHp/view?usp=sharing", "Training Model": "FAST-LCF-ATEPC", "Training Dataset": "ABSADatasets.Multilingual", "Language": "Multilingual", "Description": "Trained on RTX3090", "Available Version": "1.10.5-1.15.7", "Checkpoint File": "fast_lcf_atepc_Multilingual_cdw_apcacc_87.21_apcf1_81.53_atef1_82.82.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "multilingual4": {"id": "https://drive.google.com/file/d/1MAnpTiCHyjUDazel2kgHgaKXaDeE0NYW/view?usp=sharing", "Training Model": "FAST-LCF-ATEPC", "Training Dataset": "ABSADatasets.Multilingual", "Language": "Multilingual", "Description": "Trained on RTX3090", "Available Version": "1.10.5-1.15.7", "Checkpoint File": "fast_lcf_atepc_Multilingual_cdw_apcacc_88.96_apcf1_81.58_atef1_81.92.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "HELP-WANTED": {"id": "", "Description": "You can help us by sharing checkpoints (e.g. models trained on you own datasets) with community.", "Checkpoint File": "PLEASE NOTE THAT THIS IS NOT A REAL CHECKPOINT!", "Available Version": ""}}, "TC": {"tc-sst2": {"id": "https://drive.google.com/file/d/1KgEB7MJ8bjrBiYdbtiojreeYegnUtq51/view?usp=sharing", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TC-SST2.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tc-agnews10k": {"id": "https://drive.google.com/file/d/1JiRL19maSBiu9_1VBpP1gMwN8qkZsAtC/view?usp=sharing", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TC-AGNews10K.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tc-imdb10k": {"id": "https://drive.google.com/file/d/1TD0xyKKEfs_S0Ze0EmEb16Eq5pdukjN_/view?usp=sharing", "Training Model": "TAD", "Training Dataset": "IMDB10K", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TC-IMDB10K.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "HELP-WANTED": {"id": "", "Description": "You can help us by sharing checkpoints (e.g. models trained on you own datasets) with community.", "Checkpoint File": "PLEASE NOTE THAT THIS IS NOT A REAL CHECKPOINT!", "Available Version": ""}}, "TAD": {"tad-sst2": {"id": "", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-SST2.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-sst2bae": {"id": "", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-SST2BAE.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-sst2pwws": {"id": "", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-SST2PWWS.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-sst2textfooler": {"id": "", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-SST2TextFooler.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-sst2": {"id": "", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-SST2.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-sst2bae": {"id": "", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-SST2BAE.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-sst2pwws": {"id": "", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-SST2PWWS.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-sst2textfooler": {"id": "", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-SST2TextFooler.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-agnews10k": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-AGNews10K.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-agnews10kbae": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-AGNews10KBAE.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-agnews10kpwws": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-AGNews10KPWWS.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-agnews10ktextfooler": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-AGNews10KTextFooler.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-agnews10k": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-AGNews10K.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-agnews10kbae": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-AGNews10KBAE.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-agnews10kpwws": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-AGNews10KPWWS.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-agnews10ktextfooler": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-AGNews10KTextFooler.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-amazon": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-Amazon.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-amazonbae": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-AmazonBAE.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-amazonpwws": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-AmazonPWWS.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-amazontextfooler": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-AmazonTextFooler.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-amazon": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-Amazon.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-amazonbae": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-AmazonBAE.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-amazonpwws": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-AmazonPWWS.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-bert-amazontextfooler": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-BERT-AmazonTextFooler.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}}}} \ No newline at end of file diff --git a/examples/reactive_defense/sst2_reactive_defense.py b/examples/reactive_defense/sst2_reactive_defense.py new file mode 100644 index 000000000..108e9a957 --- /dev/null +++ b/examples/reactive_defense/sst2_reactive_defense.py @@ -0,0 +1,136 @@ +import os + +from textattack import Attacker +from textattack.attack_recipes import (PWWSRen2019, + BAEGarg2019, + TextFoolerJin2019, + BERTAttackLi2020, + GeneticAlgorithmAlzantot2018, + CLARE2020, + FasterGeneticAlgorithmJia2019, + DeepWordBugGao2018, + PSOZang2020) +from textattack.datasets import HuggingFaceDataset +from textattack.models.wrappers import TADModelWrapper +from textattack.reactive_defense.tad_reactive_defender import TADReactiveDefender + +dataset = HuggingFaceDataset("glue", subset='sst2', split="validation") + +# init reactive_defender to post fix attacked result +reactive_defender = TADReactiveDefender('taddeberta-sst2') + +# use the based tad_classifier (without defense) to test +target_model = reactive_defender.tad_classifier + +model_wrapper = TADModelWrapper(target_model) + +# recipe = PWWSRen2019.build(model_wrapper) +# +-------------------------------+--------+ +# | Attack Results | | +# +-------------------------------+--------+ +# | Number of successful attacks: | 31 | +# | Number of failed attacks: | 801 | +# | Number of skipped attacks: | 40 | +# | Original accuracy: | 95.41% | +# | Accuracy under attack: | 91.86% | +# | Attack success rate: | 3.73% | +# | Average perturbed word %: | 7.06% | +# | Average num. words per input: | 17.4 | +# | Avg num queries: | 144.1 | +# +-------------------------------+--------+ + +# recipe = BAEGarg2019.build(model_wrapper) +# +-------------------------------+--------+ +# | Attack Results | | +# +-------------------------------+--------+ +# | Number of successful attacks: | 18 | +# | Number of failed attacks: | 79 | +# | Number of skipped attacks: | 3 | +# | Original accuracy: | 97.0% | +# | Accuracy under attack: | 79.0% | +# | Attack success rate: | 18.56% | +# | Average perturbed word %: | 12.8% | +# | Average num. words per input: | 16.92 | +# | Avg num queries: | 55.45 | +# +-------------------------------+--------+ + +# recipe = TextFoolerJin2019.build(model_wrapper) +# #+-------------------------------+--------+ +# | Attack Results | | +# +-------------------------------+--------+ +# | Number of successful attacks: | 6 | +# | Number of failed attacks: | 91 | +# | Number of skipped attacks: | 3 | +# | Original accuracy: | 97.0% | +# | Accuracy under attack: | 91.0% | +# | Attack success rate: | 6.19% | +# | Average perturbed word %: | 16.28% | +# | Average num. words per input: | 16.92 | +# | Avg num queries: | 124.56 | +# +-------------------------------+--------+ + +recipe = GeneticAlgorithmAlzantot2018.build(model_wrapper) +# +# recipe = BERTAttackLi2020.build(model_wrapper) +# +# recipe = FasterGeneticAlgorithmJia2019.build(model_wrapper) +# +# recipe = DeepWordBugGao2018.build(model_wrapper) +# +-------------------------------+--------+ +# | Attack Results | | +# +-------------------------------+--------+ +# | Number of successful attacks: | 14 | +# | Number of failed attacks: | 83 | +# | Number of skipped attacks: | 3 | +# | Original accuracy: | 97.0% | +# | Accuracy under attack: | 83.0% | +# | Attack success rate: | 14.43% | +# | Average perturbed word %: | 24.32% | +# | Average num. words per input: | 16.92 | +# | Avg num queries: | 33.13 | +# +-------------------------------+--------+ + +# recipe = CLARE2020.build(model_wrapper) +# +-------------------------------+---------+ +# | Attack Results | | +# +-------------------------------+---------+ +# | Number of successful attacks: | 50 | +# | Number of failed attacks: | 47 | +# | Number of skipped attacks: | 3 | +# | Original accuracy: | 97.0% | +# | Accuracy under attack: | 47.0% | +# | Attack success rate: | 51.55% | +# | Average perturbed word %: | 30.37% | +# | Average num. words per input: | 16.92 | +# | Avg num queries: | 1771.19 | +# +-------------------------------+---------+ + +# recipe = PSOZang2020.build(model_wrapper) +# +-------------------------------+---------+ +# | Attack Results | | +# +-------------------------------+---------+ +# | Number of successful attacks: | 10 | +# | Number of failed attacks: | 87 | +# | Number of skipped attacks: | 3 | +# | Original accuracy: | 97.0% | +# | Accuracy under attack: | 87.0% | +# | Attack success rate: | 10.31% | +# | Average perturbed word %: | 18.98% | +# | Average num. words per input: | 16.92 | +# | Avg num queries: | 6497.32 | +# +-------------------------------+---------+ + + +attacker = Attacker(recipe, dataset) + +# install pyabsa for this example +attacker.attack_args.num_examples = 100 +# results = attacker.attack_dataset() +results = attacker.attack_dataset(reactive_defender=reactive_defender) + +# Online Reactive Adversarial Defense: +# https://huggingface.co/spaces/yangheng/TAD + +# Ref repo: +# https://github.com/yangheng95/TextAttack +# https://github.com/yangheng95/PyABSA diff --git a/requirements.txt b/requirements.txt index 4befebef6..61f0870e5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,3 +23,4 @@ jieba OpenHowNet pycld2 click<8.1.0 +pyabsa>=1.16.16 diff --git a/textattack/attacker.py b/textattack/attacker.py index 96a9e21cb..c49806f44 100644 --- a/textattack/attacker.py +++ b/textattack/attacker.py @@ -102,7 +102,35 @@ def _get_worklist(self, start, end, num_examples, shuffle): assert (len(worklist) + len(candidates)) == (end - start) return worklist, candidates - def _attack(self): + def simple_attack(self, text, label): + """Internal method that carries out attack. + + No parallel processing is involved. + """ + if torch.cuda.is_available(): + self.attack.cuda_() + + example, ground_truth_output = text, label + try: + example = textattack.shared.AttackedText(example) + if self.dataset.label_names is not None: + example.attack_attrs["label_names"] = self.dataset.label_names + try: + result = self.attack.attack(example, ground_truth_output) + except Exception as e: + raise e + # return + if (isinstance(result, SkippedAttackResult) and self.attack_args.attack_n) or ( + not isinstance(result, SuccessfulAttackResult) + and self.attack_args.num_successful_examples + ): + return + else: + return result + except KeyboardInterrupt as e: + raise e + + def _attack(self, **kwargs): """Internal method that carries out attack. No parallel processing is involved. @@ -165,7 +193,7 @@ def _attack(self): if self.dataset.label_names is not None: example.attack_attrs["label_names"] = self.dataset.label_names try: - result = self.attack.attack(example, ground_truth_output) + result = self.attack.attack(example, ground_truth_output, **kwargs) except Exception as e: raise e if ( @@ -402,7 +430,7 @@ def _attack_parallel(self): self.attack_log_manager.flush() print() - def attack_dataset(self): + def attack_dataset(self, **kwargs): """Attack the dataset. Returns: @@ -438,7 +466,7 @@ def attack_dataset(self): ) self._attack_parallel() else: - self._attack() + self._attack(**kwargs) if self.attack_args.silent: logger.setLevel(logging.INFO) diff --git a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/__init__.py b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/__init__.py index 2e3b5f10b..4967402ca 100644 --- a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/__init__.py +++ b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/__init__.py @@ -3,7 +3,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ """ - from .universal_sentence_encoder import UniversalSentenceEncoder from .multilingual_universal_sentence_encoder import ( MultilingualUniversalSentenceEncoder, diff --git a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py index 578e9b892..20381e2bc 100644 --- a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py +++ b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py @@ -20,14 +20,22 @@ def __init__(self, threshold=0.8, large=False, metric="angular", **kwargs): tensorflow_text._load() if large: tfhub_url = "https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/3" + mirror_tfhub_url = "https://hub.tensorflow.google.cn/google/universal-sentence-encoder-multilingual-large/3" else: tfhub_url = ( - "https://tfhub.dev/google/universal-sentence-encoder-multilingual/3" + "https://https://tfhub.dev/google/universal-sentence-encoder-multilingual/3" + ) + mirror_tfhub_url = ( + "https://hub.tensorflow.google.cn/google/universal-sentence-encoder-multilingual/3" ) - # TODO add QA SET. Details at: https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3 + # TODO add QA SET. Details at: https://hub.tensorflow.google.cn/google/universal-sentence-encoder-multilingual-qa/3 self._tfhub_url = tfhub_url - self.model = hub.load(tfhub_url) + self.mirror_tfhub_url = mirror_tfhub_url + try: + self.model = hub.load(self._tfhub_url) + except: + self.model = hub.load(self.mirror_tfhub_url) def encode(self, sentences): return self.model(sentences).numpy() @@ -39,4 +47,7 @@ def __getstate__(self): def __setstate__(self, state): self.__dict__ = state - self.model = hub.load(self._tfhub_url) + try: + self.model = hub.load(self._tfhub_url) + except: + self.model = hub.load(self.mirror_tfhub_url) diff --git a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py index c4017a5fa..1088eb003 100644 --- a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py +++ b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py @@ -18,22 +18,23 @@ def __init__(self, threshold=0.8, large=False, metric="angular", **kwargs): super().__init__(threshold=threshold, metric=metric, **kwargs) if large: tfhub_url = "https://tfhub.dev/google/universal-sentence-encoder-large/5" + mirror_tfhub_url = "https://hub.tensorflow.google.cn/google/universal-sentence-encoder-large/5" else: - tfhub_url = "https://tfhub.dev/google/universal-sentence-encoder/3" + tfhub_url = "https://tfhub.dev/google/universal-sentence-encoder/4" + mirror_tfhub_url = "https://hub.tensorflow.google.cn/google/universal-sentence-encoder/4" self._tfhub_url = tfhub_url + self.mirror_tfhub_url = mirror_tfhub_url # Lazily load the model self.model = None def encode(self, sentences): if not self.model: - self.model = hub.load(self._tfhub_url) - encoding = self.model(sentences) - - if isinstance(encoding, dict): - encoding = encoding["outputs"] - - return encoding.numpy() + try: + self.model = hub.load(self._tfhub_url) + except: + self.model = hub.load(self.mirror_tfhub_url) + return self.model(sentences).numpy() def __getstate__(self): state = self.__dict__.copy() @@ -42,4 +43,7 @@ def __getstate__(self): def __setstate__(self, state): self.__dict__ = state - self.model = None + try: + self.model = hub.load(self._tfhub_url) + except: + self.model = hub.load(self.mirror_tfhub_url) diff --git a/textattack/dataset_args.py b/textattack/dataset_args.py index f3d50abf7..b34ea3e68 100644 --- a/textattack/dataset_args.py +++ b/textattack/dataset_args.py @@ -118,11 +118,6 @@ ), "xlnet-base-cased-wnli": ("glue", "wnli", "validation"), } - - -# -# Models hosted by textattack. -# TEXTATTACK_DATASET_BY_MODEL = { # # LSTMs @@ -162,6 +157,13 @@ # T5 for summarization # "t5-summarization": ("gigaword", None, "test"), + # + # PyABSA defender + # + "tadbert-ag-news": ("ag_news", None, "test"), + "taddeberta-ag-news": ("ag_news", None, "test"), + "tadbert-sst2": ("glue", "sst2", "validation"), + "taddeberta-sst2": ("glue", "sst2", "validation"), } diff --git a/textattack/model_args.py b/textattack/model_args.py index 73c133cff..e9607cb4c 100644 --- a/textattack/model_args.py +++ b/textattack/model_args.py @@ -93,6 +93,15 @@ "xlnet-base-cased-wnli": "textattack/xlnet-base-cased-WNLI", } +PYABSA_MODELS = { + # + # PyABSA defense models + # + "tadbert-sst2": "tad-bert-sst2", + "taddeberta-sst2": "tad-sst2", + "tadbert-ag-news": "tad-agnews10k", + "taddeberta-ag-news": "tad-bert-agnews10k", +} # # Models hosted by textattack. @@ -143,7 +152,7 @@ def _add_parser_args(cls, parser): """Adds model-related arguments to an argparser.""" model_group = parser.add_mutually_exclusive_group() - model_names = list(HUGGINGFACE_MODELS.keys()) + list(TEXTATTACK_MODELS.keys()) + model_names = list(HUGGINGFACE_MODELS.keys()) + list(TEXTATTACK_MODELS.keys()) + list(PYABSA_MODELS.keys()) model_group.add_argument( "--model", type=str, @@ -226,6 +235,18 @@ def _create_model_from_args(cls, args): model_name, use_fast=True ) model = textattack.models.wrappers.HuggingFaceModelWrapper(model, tokenizer) + elif args.model in PYABSA_MODELS: + from pyabsa import TADCheckpointManager + colored_model_name = textattack.shared.utils.color_text( + args.model, color="blue", method="ansi" + ) + textattack.shared.logger.info( + f"Loading pre-trained TAD model from https://github.com/yangheng95/PyABSA: {colored_model_name}" + ) + model = TADCheckpointManager.get_tad_text_classifier(checkpoint=PYABSA_MODELS[args.model], auto_device=True) + model = textattack.models.wrappers.TADModelWrapper( + model + ) elif args.model in TEXTATTACK_MODELS: # Support loading TextAttack pre-trained models via just a keyword. colored_model_name = textattack.shared.utils.color_text( diff --git a/textattack/models/wrappers/__init__.py b/textattack/models/wrappers/__init__.py index b1c96a861..4ae40e94e 100644 --- a/textattack/models/wrappers/__init__.py +++ b/textattack/models/wrappers/__init__.py @@ -14,3 +14,4 @@ from .pytorch_model_wrapper import PyTorchModelWrapper from .sklearn_model_wrapper import SklearnModelWrapper from .tensorflow_model_wrapper import TensorFlowModelWrapper +from .pyabsa_model_wrapper import TADModelWrapper diff --git a/textattack/models/wrappers/pyabsa_model_wrapper.py b/textattack/models/wrappers/pyabsa_model_wrapper.py new file mode 100644 index 000000000..a62b4adcd --- /dev/null +++ b/textattack/models/wrappers/pyabsa_model_wrapper.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +# file: pyabsa_model_wrapper.py +# time: 22/06/2022 +# author: yangheng +# github: https://github.com/yangheng95 +# Copyright (C) 2021. All Rights Reserved. +from textattack.models.wrappers import HuggingFaceModelWrapper + + +class TADModelWrapper(HuggingFaceModelWrapper): + """ Transformers sentiment analysis pipeline returns a list of responses + like + + [{'label': 'POSITIVE', 'score': 0.7817379832267761}] + + We need to convert that to a format TextAttack understands, like + + [[0.218262017, 0.7817379832267761] + """ + + def __init__(self, model): + self.model = model # pipeline = pipeline + + def __call__(self, text_inputs, **kwargs): + outputs = [] + for text_input in text_inputs: + raw_outputs = self.model.infer(text_input, print_result=False, **kwargs) + outputs.append(raw_outputs['probs']) + + return outputs diff --git a/textattack/reactive_defense/reactive_defender.py b/textattack/reactive_defense/reactive_defender.py new file mode 100644 index 000000000..a96b6c692 --- /dev/null +++ b/textattack/reactive_defense/reactive_defender.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# file: reactive_defense.py +# time: 2022/8/20 +# author: yangheng +# github: https://github.com/yangheng95 +# huggingface: https://huggingface.co/yangheng +# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en +# Copyright (C) 2021. All Rights Reserved. +from abc import ABC, abstractmethod + +from textattack.shared.utils import ReprMixin + + +class ReactiveDefender(ReprMixin, ABC): + + def __init__(self, **kwargs): + pass + + def reactive_defense(self, **kwargs): + pass diff --git a/textattack/reactive_defense/tad_reactive_defender.py b/textattack/reactive_defense/tad_reactive_defender.py new file mode 100644 index 000000000..f3e678981 --- /dev/null +++ b/textattack/reactive_defense/tad_reactive_defender.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +# file: tad_defense.py +# time: 2022/8/20 +# author: yangheng +# github: https://github.com/yangheng95 +# huggingface: https://huggingface.co/yangheng +# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en +# Copyright (C) 2021. All Rights Reserved. +from pyabsa import TADCheckpointManager + +from textattack.model_args import PYABSA_MODELS +from textattack.reactive_defense.reactive_defender import ReactiveDefender + + +class TADReactiveDefender(ReactiveDefender): + """ Transformers sentiment analysis pipeline returns a list of responses + like + + [{'label': 'POSITIVE', 'score': 0.7817379832267761}] + + We need to convert that to a format TextAttack understands, like + + [[0.218262017, 0.7817379832267761] + """ + + def __init__(self, ckpt='tad-sst2', **kwargs): + super().__init__(**kwargs) + self.tad_classifier = TADCheckpointManager.get_tad_text_classifier(checkpoint=PYABSA_MODELS[ckpt], + auto_device=True) + + def reactive_defense(self, text, **kwargs): + res = self.tad_classifier.infer(text, defense='pwws', print_result=False, **kwargs) + return res diff --git a/textattack/trainer.py b/textattack/trainer.py index 2b389b74d..77b47c6cd 100644 --- a/textattack/trainer.py +++ b/textattack/trainer.py @@ -89,13 +89,13 @@ class Trainer: """ def __init__( - self, - model_wrapper, - task_type="classification", - attack=None, - train_dataset=None, - eval_dataset=None, - training_args=None, + self, + model_wrapper, + task_type="classification", + attack=None, + train_dataset=None, + eval_dataset=None, + training_args=None, ): assert isinstance( model_wrapper, ModelWrapper @@ -164,7 +164,7 @@ def __init__( def _generate_adversarial_examples(self, epoch): """Generate adversarial examples using attacker.""" assert ( - self.attack is not None + self.attack is not None ), "`attack` is `None` but attempting to generate adversarial examples." base_file_name = f"attack-train-{epoch}" log_file_name = os.path.join(self.training_args.output_dir, base_file_name) @@ -217,7 +217,7 @@ def _generate_adversarial_examples(self, epoch): attack_types = collections.Counter(r.__class__.__name__ for r in results) total_attacks = ( - attack_types["SuccessfulAttackResult"] + attack_types["FailedAttackResult"] + attack_types["SuccessfulAttackResult"] + attack_types["FailedAttackResult"] ) success_rate = attack_types["SuccessfulAttackResult"] / total_attacks * 100 logger.info(f"Total number of attack results: {len(results)}") @@ -251,7 +251,7 @@ def _generate_adversarial_examples(self, epoch): return adversarial_dataset def _print_training_args( - self, total_training_steps, train_batch_size, num_clean_epochs + self, total_training_steps, train_batch_size, num_clean_epochs ): logger.info("***** Running training *****") logger.info(f" Num examples = {len(self.train_dataset)}") @@ -269,7 +269,7 @@ def _print_training_args( logger.info(f" Total optimization steps = {total_training_steps}") def _save_model_checkpoint( - self, model, tokenizer, step=None, epoch=None, best=False, last=False + self, model, tokenizer, step=None, epoch=None, best=False, last=False ): # Save model checkpoint if step: @@ -398,6 +398,7 @@ def get_train_dataloader(self, dataset, adv_dataset, batch_size): Returns: :obj:`torch.utils.data.DataLoader` """ + # TODO: Add pairing option where we can pair original examples with adversarial examples. # Helper functions for collating data def collate_fn(data): @@ -460,6 +461,7 @@ def get_eval_dataloader(self, dataset, batch_size): Returns: :obj:`torch.utils.data.DataLoader` """ + # Helper functions for collating data def collate_fn(data): input_texts = [] @@ -512,8 +514,8 @@ def training_step(self, model, tokenizer, batch): targets = targets.to(textattack.shared.utils.device) if isinstance(model, transformers.PreTrainedModel) or ( - isinstance(model, torch.nn.DataParallel) - and isinstance(model.module, transformers.PreTrainedModel) + isinstance(model, torch.nn.DataParallel) + and isinstance(model.module, transformers.PreTrainedModel) ): input_ids = tokenizer( input_texts, @@ -637,11 +639,11 @@ def train(self): num_clean_epochs = self.training_args.num_clean_epochs total_clean_training_steps = ( - math.ceil( - len(self.train_dataset) - / (train_batch_size * self.training_args.gradient_accumulation_steps) - ) - * num_clean_epochs + math.ceil( + len(self.train_dataset) + / (train_batch_size * self.training_args.gradient_accumulation_steps) + ) + * num_clean_epochs ) # calculate total_adv_training_data_length based on type of @@ -649,13 +651,13 @@ def train(self): # if num_train_adv_examples is float , num_train_adv_examples is a portion of train_dataset. if isinstance(self.training_args.num_train_adv_examples, float): total_adv_training_data_length = ( - len(self.train_dataset) * self.training_args.num_train_adv_examples + len(self.train_dataset) * self.training_args.num_train_adv_examples ) # if num_train_adv_examples is int and >=0 then it is taken as value. elif ( - isinstance(self.training_args.num_train_adv_examples, int) - and self.training_args.num_train_adv_examples >= 0 + isinstance(self.training_args.num_train_adv_examples, int) + and self.training_args.num_train_adv_examples >= 0 ): total_adv_training_data_length = self.training_args.num_train_adv_examples @@ -700,7 +702,7 @@ def train(self): if self.attack and epoch > num_clean_epochs: if ( - epoch - num_clean_epochs - 1 + epoch - num_clean_epochs - 1 ) % self.training_args.attack_epoch_interval == 0: # only generate a new adversarial training set every self.training_args.attack_period epochs after the clean epochs # adv_dataset is instance of `textattack.datasets.Dataset` @@ -752,12 +754,12 @@ def train(self): if self._global_step > 0: prog_bar.set_description( - f"Loss {self._total_loss/self._global_step:.5f}" + f"Loss {self._total_loss / self._global_step:.5f}" ) # TODO: Better way to handle TB and Wandb logging if (self._global_step > 0) and ( - self._global_step % self.training_args.logging_interval_step == 0 + self._global_step % self.training_args.logging_interval_step == 0 ): lr_to_log = ( scheduler.get_last_lr()[0] @@ -786,12 +788,12 @@ def train(self): # Save model checkpoint to file. if self.training_args.checkpoint_interval_steps: if ( - self._global_step > 0 - and ( + self._global_step > 0 + and ( self._global_step % self.training_args.checkpoint_interval_steps - ) - == 0 + ) + == 0 ): self._save_model_checkpoint( model, tokenizer, step=self._global_step @@ -803,7 +805,7 @@ def train(self): correct_predictions = (preds == targets).sum().item() accuracy = correct_predictions / len(targets) metric_log = {"train/train_accuracy": accuracy} - logger.info(f"Train accuracy: {accuracy*100:.2f}%") + logger.info(f"Train accuracy: {accuracy * 100:.2f}%") else: pearson_correlation, pearson_pvalue = scipy.stats.pearsonr( preds, targets @@ -833,8 +835,8 @@ def train(self): ) if ( - self.training_args.checkpoint_interval_epochs - and (epoch % self.training_args.checkpoint_interval_epochs) == 0 + self.training_args.checkpoint_interval_epochs + and (epoch % self.training_args.checkpoint_interval_epochs) == 0 ): self._save_model_checkpoint(model, tokenizer, epoch=epoch) @@ -849,8 +851,8 @@ def train(self): else: epochs_since_best_eval_score += 1 if self.training_args.early_stopping_epochs and ( - epochs_since_best_eval_score - > self.training_args.early_stopping_epochs + epochs_since_best_eval_score + > self.training_args.early_stopping_epochs ): logger.info( f"Stopping early since it's been {self.training_args.early_stopping_epochs} steps since validation score increased." @@ -919,7 +921,7 @@ def evaluate(self): eval_score = accuracy if self._metric_name == "accuracy": - logger.info(f"Eval {self._metric_name}: {eval_score*100:.2f}%") + logger.info(f"Eval {self._metric_name}: {eval_score * 100:.2f}%") else: logger.info(f"Eval {self._metric_name}: {eval_score:.4f}%") @@ -930,8 +932,8 @@ def _write_readme(self, best_eval_score, best_eval_score_epoch, train_batch_size model_name = self.training_args.model_name_or_path elif isinstance(self.model_wrapper.model, transformers.PreTrainedModel): if ( - hasattr(self.model_wrapper.model.config, "_name_or_path") - and self.model_wrapper.model.config._name_or_path in HUGGINGFACE_MODELS + hasattr(self.model_wrapper.model.config, "_name_or_path") + and self.model_wrapper.model.config._name_or_path in HUGGINGFACE_MODELS ): # TODO Better way than just checking HUGGINGFACE_MODELS ? model_name = self.model_wrapper.model.config._name_or_path @@ -946,17 +948,17 @@ def _write_readme(self, best_eval_score, best_eval_score_epoch, train_batch_size model_name = f"`{model_name}`" if ( - isinstance(self.training_args, CommandLineTrainingArgs) - and self.training_args.model_max_length + isinstance(self.training_args, CommandLineTrainingArgs) + and self.training_args.model_max_length ): model_max_length = self.training_args.model_max_length elif isinstance( - self.model_wrapper.model, - ( - transformers.PreTrainedModel, - LSTMForClassification, - WordCNNForClassification, - ), + self.model_wrapper.model, + ( + transformers.PreTrainedModel, + LSTMForClassification, + WordCNNForClassification, + ), ): model_max_length = self.model_wrapper.tokenizer.model_max_length else: @@ -968,13 +970,13 @@ def _write_readme(self, best_eval_score, best_eval_score_epoch, train_batch_size model_max_length_str = "" if isinstance( - self.train_dataset, textattack.datasets.HuggingFaceDataset + self.train_dataset, textattack.datasets.HuggingFaceDataset ) and hasattr(self.train_dataset, "_name"): dataset_name = self.train_dataset._name if hasattr(self.train_dataset, "_subset"): dataset_name += f" ({self.train_dataset._subset})" elif isinstance( - self.eval_dataset, textattack.datasets.HuggingFaceDataset + self.eval_dataset, textattack.datasets.HuggingFaceDataset ) and hasattr(self.eval_dataset, "_name"): dataset_name = self.eval_dataset._name if hasattr(self.eval_dataset, "_subset"): From f8026700fc4ef0892a04ae2d33a86d05f300ba3c Mon Sep 17 00:00:00 2001 From: yangheng95 Date: Thu, 29 Sep 2022 16:34:08 +0100 Subject: [PATCH 02/12] update_adversarial_defense --- docs/2notebook/Example_2_allennlp.ipynb | 6250 +++++++++-------- .../reactive_defense/sst2_reactive_defense.py | 7 +- textattack/attack.py | 8 +- textattack/dataset_args.py | 9 + .../reactive_defense/reactive_defender.py | 6 +- .../reactive_defense/tad_reactive_defender.py | 2 +- textattack/search_methods/search_method.py | 14 +- 7 files changed, 3170 insertions(+), 3126 deletions(-) diff --git a/docs/2notebook/Example_2_allennlp.ipynb b/docs/2notebook/Example_2_allennlp.ipynb index 928c7dd3d..87c1bd76b 100644 --- a/docs/2notebook/Example_2_allennlp.ipynb +++ b/docs/2notebook/Example_2_allennlp.ipynb @@ -1,3144 +1,3162 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "accelerator": "GPU", + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "[TextAttack] Model Example: AllenNLP", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python379jvsc74a57bd00aa23297d40f12761ebb1c384bf2965d5ecbdef2f9c005ee7346b9ec0bcc5588", + "display_name": "Python 3.7.9 64-bit ('pytorch-gpu': pyenv)" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "6b448a4eedc844ef840ca70aa997d02b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_bd686416d53a4d88b3ae1e357c4f0e71", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_3b3da3896eca40caac9561b1979c90ba", + "IPY_MODEL_47c06887d2aa477a820737eda5fb3ad4", + "IPY_MODEL_aaed99b5432b47508d5090a8df7c24bc" + ] + } + }, + "bd686416d53a4d88b3ae1e357c4f0e71": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "3b3da3896eca40caac9561b1979c90ba": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_f59ffe8c7da14da08c861235cf2d9ea7", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "Downloading: ", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_869e01668ff342178f40f385a0bc3366" + } + }, + "47c06887d2aa477a820737eda5fb3ad4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_266f90eabfea46e1ae5ee4bc22f711ee", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 7777, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 7777, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_dff48b2efd70497ba4b28ca6bd1499d9" + } + }, + "aaed99b5432b47508d5090a8df7c24bc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_6439f6674b484b14b4e9bf21497efc56", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 28.8k/? [00:00<00:00, 609kB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_45c77d8e79d14fffab433e78b86048ce" + } + }, + "f59ffe8c7da14da08c861235cf2d9ea7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "869e01668ff342178f40f385a0bc3366": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "266f90eabfea46e1ae5ee4bc22f711ee": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "dff48b2efd70497ba4b28ca6bd1499d9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "6439f6674b484b14b4e9bf21497efc56": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "45c77d8e79d14fffab433e78b86048ce": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "2cc82d8fd98749e7b160ac4dae04c9d8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_60b5c7c86aa94936b06981c65b9db3e8", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_d26fbb35af5f45d7ad75977ea9c5ffad", + "IPY_MODEL_9e0287b81c6f45a386858de9c8e8735e", + "IPY_MODEL_b60a716e37964537b122ce1116e002d0" + ] + } + }, + "60b5c7c86aa94936b06981c65b9db3e8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "d26fbb35af5f45d7ad75977ea9c5ffad": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_516f9277541e4c199a2fa125a75f8bdb", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "Downloading: ", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_6f0f652f722f4827ba1eab9fb081d8d2" + } + }, + "9e0287b81c6f45a386858de9c8e8735e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_fd5086beccb6431fa907d90d7168f79f", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 4473, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 4473, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_daa2c6454a704b84bd7e2525a52dba0c" + } + }, + "b60a716e37964537b122ce1116e002d0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_f7f132d7b56b4bb9b950b09ad27ca115", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 28.7k/? [00:00<00:00, 652kB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_fa983f4a063b4a83856b5d219e3ed04b" + } + }, + "516f9277541e4c199a2fa125a75f8bdb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "6f0f652f722f4827ba1eab9fb081d8d2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "fd5086beccb6431fa907d90d7168f79f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "daa2c6454a704b84bd7e2525a52dba0c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "f7f132d7b56b4bb9b950b09ad27ca115": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "fa983f4a063b4a83856b5d219e3ed04b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "af00f37ede9e45f0a59fdf54711cf985": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_d88766c2c3bc4e7e83530b7ae6647ffd", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_38c0d380e8ac4290b6d82979d0bb131a", + "IPY_MODEL_c93c7594ee084a4283144933bfcafefd", + "IPY_MODEL_0150a0c8ed674464874ba83453e0ddbd" + ] + } + }, + "d88766c2c3bc4e7e83530b7ae6647ffd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "38c0d380e8ac4290b6d82979d0bb131a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_0c8e9d22a63644cd88dd5aa7ba08a21f", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "Downloading: 100%", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_603c23100ac54d01ada1ebbba7bb5fc0" + } + }, + "c93c7594ee084a4283144933bfcafefd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_25f91f41a1de48498ebd248a3cce85a8", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 7439277, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 7439277, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_d1b6bdc47c544e84ae3ba3b584c7afa3" + } + }, + "0150a0c8ed674464874ba83453e0ddbd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_138cf20c691d4e9784adefea3ceecd1d", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 7.44M/7.44M [00:00<00:00, 13.6MB/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_5d02bf542c7c4c289722e03e56f5d56c" + } + }, + "0c8e9d22a63644cd88dd5aa7ba08a21f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "603c23100ac54d01ada1ebbba7bb5fc0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "25f91f41a1de48498ebd248a3cce85a8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "d1b6bdc47c544e84ae3ba3b584c7afa3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "138cf20c691d4e9784adefea3ceecd1d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "5d02bf542c7c4c289722e03e56f5d56c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "424af94826664dc1a8b38f252c4e047f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_6e779ad14425452aa70f0efbf40f99b4", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_264ab6ca60db4c29ad45830ab9de40ef", + "IPY_MODEL_627d891d68474869a38e1801afe63b89", + "IPY_MODEL_48ac05c8ff36473e8896be8a47d876a4" + ] + } + }, + "6e779ad14425452aa70f0efbf40f99b4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "264ab6ca60db4c29ad45830ab9de40ef": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_eef891ab6af04cd8ad39e50109c15bba", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_743a844b407e41e7b9a84cc5feb1b7d0" + } + }, + "627d891d68474869a38e1801afe63b89": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_d2c4e06c58174175baa80d3c316dcc09", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "info", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_5bd2eabf4cd343cf8a6056e8535d3150" + } + }, + "48ac05c8ff36473e8896be8a47d876a4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_2accb80f9e0440328199a278739c2d67", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 66044/0 [00:02<00:00, 23358.81 examples/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_cb1c32ecba014b84bf5832fff6732526" + } + }, + "eef891ab6af04cd8ad39e50109c15bba": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "743a844b407e41e7b9a84cc5feb1b7d0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "d2c4e06c58174175baa80d3c316dcc09": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "5bd2eabf4cd343cf8a6056e8535d3150": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": "20px", + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "2accb80f9e0440328199a278739c2d67": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "cb1c32ecba014b84bf5832fff6732526": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "f6ad5b1ec3f64ddbbf0d3bdb6d567658": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_af907b5540244cd4a38d9deeccbba57a", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_0a273d61ed62463daee40739cd52ae28", + "IPY_MODEL_f1c36b2e6651488b900ea7659a10ff4c", + "IPY_MODEL_519fb7f7926c4a31a561153deec61bc1" + ] + } + }, + "af907b5540244cd4a38d9deeccbba57a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "0a273d61ed62463daee40739cd52ae28": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_5e009457ac3d4ef5a3b3fb6560b3c80f", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_d8a9cfa29033467c8201007c05897627" + } + }, + "f1c36b2e6651488b900ea7659a10ff4c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_b8640741b3404eb2956a5b60a377db06", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "info", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_810727ad014a42aa8788a03578b8ee52" + } + }, + "519fb7f7926c4a31a561153deec61bc1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_2c2f360da4a64a0b8f21a28774ede852", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 525/0 [00:00<00:00, 5205.72 examples/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_36a2c00fefb64582b09eca3c02a33956" + } + }, + "5e009457ac3d4ef5a3b3fb6560b3c80f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "d8a9cfa29033467c8201007c05897627": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "b8640741b3404eb2956a5b60a377db06": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "810727ad014a42aa8788a03578b8ee52": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": "20px", + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "2c2f360da4a64a0b8f21a28774ede852": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "36a2c00fefb64582b09eca3c02a33956": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "2a0bd608a44944fda41042d07d54b076": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_6f5f6167aba247458fa8371416cd27d1", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_182aa21c1cab4699b21c89babf8b92ab", + "IPY_MODEL_37816bf4761c448c9bde942c8a7e4c7e", + "IPY_MODEL_c921586e8995495f8c8313da78382ff7" + ] + } + }, + "6f5f6167aba247458fa8371416cd27d1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "182aa21c1cab4699b21c89babf8b92ab": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_fcca6c8857ea4edfbda08ed390747ad8", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_83b50dba703b4eb6a3f488083a341dbc" + } + }, + "37816bf4761c448c9bde942c8a7e4c7e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_530f8a34716e4790b161f578ca592602", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "info", + "max": 1, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 1, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_ecd1834382af47f48ccaed3d3e13b348" + } + }, + "c921586e8995495f8c8313da78382ff7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_4632d5700b7a4180b4ebef6ed36019c1", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 362/0 [00:00<00:00, 3492.25 examples/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_40f8c3c973034a7288156a727d84e1fc" + } + }, + "fcca6c8857ea4edfbda08ed390747ad8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "83b50dba703b4eb6a3f488083a341dbc": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "530f8a34716e4790b161f578ca592602": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "ecd1834382af47f48ccaed3d3e13b348": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": "20px", + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "4632d5700b7a4180b4ebef6ed36019c1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "40f8c3c973034a7288156a727d84e1fc": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "73a5417a077f4f7e82e7f11d7f4fefba": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_787e669ab19f4b3694b7560dd9012b68", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_3221c018b6604cada04f2710fd00e750", + "IPY_MODEL_068d9c59920d48b188b7e52c9117b6e6", + "IPY_MODEL_bf95592be6084cb782f11e2957120215" + ] + } + }, + "787e669ab19f4b3694b7560dd9012b68": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "3221c018b6604cada04f2710fd00e750": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_660a5285054b439496a555cdfef285b8", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "100%", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_1aa35c99719544d995629b2c797b6813" + } + }, + "068d9c59920d48b188b7e52c9117b6e6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_da770238262140a881a3ba9f7c9a6187", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 3, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 3, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_7a55e9a929c7489fbed6a3fd970c0621" + } + }, + "bf95592be6084cb782f11e2957120215": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_d5e2ca12c98b4cb1b7e7f869ee1e549d", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 3/3 [00:00<00:00, 62.18it/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_57c72b3beb6c422690e0be7ba8c583c5" + } + }, + "660a5285054b439496a555cdfef285b8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "1aa35c99719544d995629b2c797b6813": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "da770238262140a881a3ba9f7c9a6187": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "7a55e9a929c7489fbed6a3fd970c0621": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "d5e2ca12c98b4cb1b7e7f869ee1e549d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "57c72b3beb6c422690e0be7ba8c583c5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + } + } + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "JPVBc5ndpFIX", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# TextAttack & AllenNLP \n", + "\n", + "This is an example of testing adversarial attacks from TextAttack on pretrained models provided by AllenNLP. \n", + "\n", + "In a few lines of code, we load a sentiment analysis model trained on the Stanford Sentiment Treebank and configure it with a TextAttack model wrapper. Then, we initialize the TextBugger attack and run the attack on a few samples from the SST-2 train set.\n", + "\n", + "For more information on AllenNLP pre-trained models: https://docs.allennlp.org/models/main/\n", + "\n", + "For more information about the TextBugger attack: https://arxiv.org/abs/1812.05271" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AyPMGcz0qLfK", + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QData/TextAttack/blob/master/docs/2notebook/Example_2_allennlp.ipynb)\n", + "\n", + "[![View Source on GitHub](https://img.shields.io/badge/github-view%20source-black.svg)](https://github.com/QData/TextAttack/blob/master/docs/2notebook/Example_2_allennlp.ipynb)" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "My9oy5iBSKfb", + "pycharm": { + "name": "#%%\n" + } + }, + "source": [ + "!pip install allennlp allennlp_models > /dev/null" + ], + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "z8wAb0BcSg8W", + "outputId": "8cc26ced-6f03-433c-97d2-72037c606fde", "colab": { - "name": "[TextAttack] Model Example: AllenNLP", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "name": "python379jvsc74a57bd00aa23297d40f12761ebb1c384bf2965d5ecbdef2f9c005ee7346b9ec0bcc5588", - "display_name": "Python 3.7.9 64-bit ('pytorch-gpu': pyenv)" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "6b448a4eedc844ef840ca70aa997d02b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HBoxView", - "_dom_classes": [], - "_model_name": "HBoxModel", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.5.0", - "box_style": "", - "layout": "IPY_MODEL_bd686416d53a4d88b3ae1e357c4f0e71", - "_model_module": "@jupyter-widgets/controls", - "children": [ - "IPY_MODEL_3b3da3896eca40caac9561b1979c90ba", - "IPY_MODEL_47c06887d2aa477a820737eda5fb3ad4", - "IPY_MODEL_aaed99b5432b47508d5090a8df7c24bc" - ] - } - }, - "bd686416d53a4d88b3ae1e357c4f0e71": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "3b3da3896eca40caac9561b1979c90ba": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HTMLView", - "style": "IPY_MODEL_f59ffe8c7da14da08c861235cf2d9ea7", - "_dom_classes": [], - "description": "", - "_model_name": "HTMLModel", - "placeholder": "​", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": "Downloading: ", - "_view_count": null, - "_view_module_version": "1.5.0", - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_869e01668ff342178f40f385a0bc3366" - } - }, - "47c06887d2aa477a820737eda5fb3ad4": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "ProgressView", - "style": "IPY_MODEL_266f90eabfea46e1ae5ee4bc22f711ee", - "_dom_classes": [], - "description": "", - "_model_name": "FloatProgressModel", - "bar_style": "success", - "max": 7777, - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": 7777, - "_view_count": null, - "_view_module_version": "1.5.0", - "orientation": "horizontal", - "min": 0, - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_dff48b2efd70497ba4b28ca6bd1499d9" - } - }, - "aaed99b5432b47508d5090a8df7c24bc": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HTMLView", - "style": "IPY_MODEL_6439f6674b484b14b4e9bf21497efc56", - "_dom_classes": [], - "description": "", - "_model_name": "HTMLModel", - "placeholder": "​", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": " 28.8k/? [00:00<00:00, 609kB/s]", - "_view_count": null, - "_view_module_version": "1.5.0", - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_45c77d8e79d14fffab433e78b86048ce" - } - }, - "f59ffe8c7da14da08c861235cf2d9ea7": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "DescriptionStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "_model_module": "@jupyter-widgets/controls" - } - }, - "869e01668ff342178f40f385a0bc3366": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "266f90eabfea46e1ae5ee4bc22f711ee": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "ProgressStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "bar_color": null, - "_model_module": "@jupyter-widgets/controls" - } - }, - "dff48b2efd70497ba4b28ca6bd1499d9": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "6439f6674b484b14b4e9bf21497efc56": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "DescriptionStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "_model_module": "@jupyter-widgets/controls" - } - }, - "45c77d8e79d14fffab433e78b86048ce": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "2cc82d8fd98749e7b160ac4dae04c9d8": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HBoxView", - "_dom_classes": [], - "_model_name": "HBoxModel", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.5.0", - "box_style": "", - "layout": "IPY_MODEL_60b5c7c86aa94936b06981c65b9db3e8", - "_model_module": "@jupyter-widgets/controls", - "children": [ - "IPY_MODEL_d26fbb35af5f45d7ad75977ea9c5ffad", - "IPY_MODEL_9e0287b81c6f45a386858de9c8e8735e", - "IPY_MODEL_b60a716e37964537b122ce1116e002d0" - ] - } - }, - "60b5c7c86aa94936b06981c65b9db3e8": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "d26fbb35af5f45d7ad75977ea9c5ffad": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HTMLView", - "style": "IPY_MODEL_516f9277541e4c199a2fa125a75f8bdb", - "_dom_classes": [], - "description": "", - "_model_name": "HTMLModel", - "placeholder": "​", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": "Downloading: ", - "_view_count": null, - "_view_module_version": "1.5.0", - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_6f0f652f722f4827ba1eab9fb081d8d2" - } - }, - "9e0287b81c6f45a386858de9c8e8735e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "ProgressView", - "style": "IPY_MODEL_fd5086beccb6431fa907d90d7168f79f", - "_dom_classes": [], - "description": "", - "_model_name": "FloatProgressModel", - "bar_style": "success", - "max": 4473, - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": 4473, - "_view_count": null, - "_view_module_version": "1.5.0", - "orientation": "horizontal", - "min": 0, - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_daa2c6454a704b84bd7e2525a52dba0c" - } - }, - "b60a716e37964537b122ce1116e002d0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HTMLView", - "style": "IPY_MODEL_f7f132d7b56b4bb9b950b09ad27ca115", - "_dom_classes": [], - "description": "", - "_model_name": "HTMLModel", - "placeholder": "​", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": " 28.7k/? [00:00<00:00, 652kB/s]", - "_view_count": null, - "_view_module_version": "1.5.0", - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_fa983f4a063b4a83856b5d219e3ed04b" - } - }, - "516f9277541e4c199a2fa125a75f8bdb": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "DescriptionStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "_model_module": "@jupyter-widgets/controls" - } - }, - "6f0f652f722f4827ba1eab9fb081d8d2": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "fd5086beccb6431fa907d90d7168f79f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "ProgressStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "bar_color": null, - "_model_module": "@jupyter-widgets/controls" - } - }, - "daa2c6454a704b84bd7e2525a52dba0c": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "f7f132d7b56b4bb9b950b09ad27ca115": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "DescriptionStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "_model_module": "@jupyter-widgets/controls" - } - }, - "fa983f4a063b4a83856b5d219e3ed04b": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "af00f37ede9e45f0a59fdf54711cf985": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HBoxView", - "_dom_classes": [], - "_model_name": "HBoxModel", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.5.0", - "box_style": "", - "layout": "IPY_MODEL_d88766c2c3bc4e7e83530b7ae6647ffd", - "_model_module": "@jupyter-widgets/controls", - "children": [ - "IPY_MODEL_38c0d380e8ac4290b6d82979d0bb131a", - "IPY_MODEL_c93c7594ee084a4283144933bfcafefd", - "IPY_MODEL_0150a0c8ed674464874ba83453e0ddbd" - ] - } - }, - "d88766c2c3bc4e7e83530b7ae6647ffd": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "38c0d380e8ac4290b6d82979d0bb131a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HTMLView", - "style": "IPY_MODEL_0c8e9d22a63644cd88dd5aa7ba08a21f", - "_dom_classes": [], - "description": "", - "_model_name": "HTMLModel", - "placeholder": "​", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": "Downloading: 100%", - "_view_count": null, - "_view_module_version": "1.5.0", - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_603c23100ac54d01ada1ebbba7bb5fc0" - } - }, - "c93c7594ee084a4283144933bfcafefd": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "ProgressView", - "style": "IPY_MODEL_25f91f41a1de48498ebd248a3cce85a8", - "_dom_classes": [], - "description": "", - "_model_name": "FloatProgressModel", - "bar_style": "success", - "max": 7439277, - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": 7439277, - "_view_count": null, - "_view_module_version": "1.5.0", - "orientation": "horizontal", - "min": 0, - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_d1b6bdc47c544e84ae3ba3b584c7afa3" - } - }, - "0150a0c8ed674464874ba83453e0ddbd": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HTMLView", - "style": "IPY_MODEL_138cf20c691d4e9784adefea3ceecd1d", - "_dom_classes": [], - "description": "", - "_model_name": "HTMLModel", - "placeholder": "​", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": " 7.44M/7.44M [00:00<00:00, 13.6MB/s]", - "_view_count": null, - "_view_module_version": "1.5.0", - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_5d02bf542c7c4c289722e03e56f5d56c" - } - }, - "0c8e9d22a63644cd88dd5aa7ba08a21f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "DescriptionStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "_model_module": "@jupyter-widgets/controls" - } - }, - "603c23100ac54d01ada1ebbba7bb5fc0": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "25f91f41a1de48498ebd248a3cce85a8": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "ProgressStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "bar_color": null, - "_model_module": "@jupyter-widgets/controls" - } - }, - "d1b6bdc47c544e84ae3ba3b584c7afa3": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "138cf20c691d4e9784adefea3ceecd1d": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "DescriptionStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "_model_module": "@jupyter-widgets/controls" - } - }, - "5d02bf542c7c4c289722e03e56f5d56c": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "424af94826664dc1a8b38f252c4e047f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HBoxView", - "_dom_classes": [], - "_model_name": "HBoxModel", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.5.0", - "box_style": "", - "layout": "IPY_MODEL_6e779ad14425452aa70f0efbf40f99b4", - "_model_module": "@jupyter-widgets/controls", - "children": [ - "IPY_MODEL_264ab6ca60db4c29ad45830ab9de40ef", - "IPY_MODEL_627d891d68474869a38e1801afe63b89", - "IPY_MODEL_48ac05c8ff36473e8896be8a47d876a4" - ] - } - }, - "6e779ad14425452aa70f0efbf40f99b4": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "264ab6ca60db4c29ad45830ab9de40ef": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HTMLView", - "style": "IPY_MODEL_eef891ab6af04cd8ad39e50109c15bba", - "_dom_classes": [], - "description": "", - "_model_name": "HTMLModel", - "placeholder": "​", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": "", - "_view_count": null, - "_view_module_version": "1.5.0", - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_743a844b407e41e7b9a84cc5feb1b7d0" - } - }, - "627d891d68474869a38e1801afe63b89": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "ProgressView", - "style": "IPY_MODEL_d2c4e06c58174175baa80d3c316dcc09", - "_dom_classes": [], - "description": "", - "_model_name": "FloatProgressModel", - "bar_style": "info", - "max": 1, - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": 1, - "_view_count": null, - "_view_module_version": "1.5.0", - "orientation": "horizontal", - "min": 0, - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_5bd2eabf4cd343cf8a6056e8535d3150" - } - }, - "48ac05c8ff36473e8896be8a47d876a4": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HTMLView", - "style": "IPY_MODEL_2accb80f9e0440328199a278739c2d67", - "_dom_classes": [], - "description": "", - "_model_name": "HTMLModel", - "placeholder": "​", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": " 66044/0 [00:02<00:00, 23358.81 examples/s]", - "_view_count": null, - "_view_module_version": "1.5.0", - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_cb1c32ecba014b84bf5832fff6732526" - } - }, - "eef891ab6af04cd8ad39e50109c15bba": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "DescriptionStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "_model_module": "@jupyter-widgets/controls" - } - }, - "743a844b407e41e7b9a84cc5feb1b7d0": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "d2c4e06c58174175baa80d3c316dcc09": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "ProgressStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "bar_color": null, - "_model_module": "@jupyter-widgets/controls" - } - }, - "5bd2eabf4cd343cf8a6056e8535d3150": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": "20px", - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "2accb80f9e0440328199a278739c2d67": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "DescriptionStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "_model_module": "@jupyter-widgets/controls" - } - }, - "cb1c32ecba014b84bf5832fff6732526": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "f6ad5b1ec3f64ddbbf0d3bdb6d567658": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HBoxView", - "_dom_classes": [], - "_model_name": "HBoxModel", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.5.0", - "box_style": "", - "layout": "IPY_MODEL_af907b5540244cd4a38d9deeccbba57a", - "_model_module": "@jupyter-widgets/controls", - "children": [ - "IPY_MODEL_0a273d61ed62463daee40739cd52ae28", - "IPY_MODEL_f1c36b2e6651488b900ea7659a10ff4c", - "IPY_MODEL_519fb7f7926c4a31a561153deec61bc1" - ] - } - }, - "af907b5540244cd4a38d9deeccbba57a": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "0a273d61ed62463daee40739cd52ae28": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HTMLView", - "style": "IPY_MODEL_5e009457ac3d4ef5a3b3fb6560b3c80f", - "_dom_classes": [], - "description": "", - "_model_name": "HTMLModel", - "placeholder": "​", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": "", - "_view_count": null, - "_view_module_version": "1.5.0", - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_d8a9cfa29033467c8201007c05897627" - } - }, - "f1c36b2e6651488b900ea7659a10ff4c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "ProgressView", - "style": "IPY_MODEL_b8640741b3404eb2956a5b60a377db06", - "_dom_classes": [], - "description": "", - "_model_name": "FloatProgressModel", - "bar_style": "info", - "max": 1, - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": 1, - "_view_count": null, - "_view_module_version": "1.5.0", - "orientation": "horizontal", - "min": 0, - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_810727ad014a42aa8788a03578b8ee52" - } - }, - "519fb7f7926c4a31a561153deec61bc1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HTMLView", - "style": "IPY_MODEL_2c2f360da4a64a0b8f21a28774ede852", - "_dom_classes": [], - "description": "", - "_model_name": "HTMLModel", - "placeholder": "​", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": " 525/0 [00:00<00:00, 5205.72 examples/s]", - "_view_count": null, - "_view_module_version": "1.5.0", - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_36a2c00fefb64582b09eca3c02a33956" - } - }, - "5e009457ac3d4ef5a3b3fb6560b3c80f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "DescriptionStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "_model_module": "@jupyter-widgets/controls" - } - }, - "d8a9cfa29033467c8201007c05897627": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "b8640741b3404eb2956a5b60a377db06": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "ProgressStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "bar_color": null, - "_model_module": "@jupyter-widgets/controls" - } - }, - "810727ad014a42aa8788a03578b8ee52": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": "20px", - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "2c2f360da4a64a0b8f21a28774ede852": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "DescriptionStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "_model_module": "@jupyter-widgets/controls" - } - }, - "36a2c00fefb64582b09eca3c02a33956": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "2a0bd608a44944fda41042d07d54b076": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HBoxView", - "_dom_classes": [], - "_model_name": "HBoxModel", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.5.0", - "box_style": "", - "layout": "IPY_MODEL_6f5f6167aba247458fa8371416cd27d1", - "_model_module": "@jupyter-widgets/controls", - "children": [ - "IPY_MODEL_182aa21c1cab4699b21c89babf8b92ab", - "IPY_MODEL_37816bf4761c448c9bde942c8a7e4c7e", - "IPY_MODEL_c921586e8995495f8c8313da78382ff7" - ] - } - }, - "6f5f6167aba247458fa8371416cd27d1": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "182aa21c1cab4699b21c89babf8b92ab": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HTMLView", - "style": "IPY_MODEL_fcca6c8857ea4edfbda08ed390747ad8", - "_dom_classes": [], - "description": "", - "_model_name": "HTMLModel", - "placeholder": "​", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": "", - "_view_count": null, - "_view_module_version": "1.5.0", - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_83b50dba703b4eb6a3f488083a341dbc" - } - }, - "37816bf4761c448c9bde942c8a7e4c7e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "ProgressView", - "style": "IPY_MODEL_530f8a34716e4790b161f578ca592602", - "_dom_classes": [], - "description": "", - "_model_name": "FloatProgressModel", - "bar_style": "info", - "max": 1, - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": 1, - "_view_count": null, - "_view_module_version": "1.5.0", - "orientation": "horizontal", - "min": 0, - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_ecd1834382af47f48ccaed3d3e13b348" - } - }, - "c921586e8995495f8c8313da78382ff7": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HTMLView", - "style": "IPY_MODEL_4632d5700b7a4180b4ebef6ed36019c1", - "_dom_classes": [], - "description": "", - "_model_name": "HTMLModel", - "placeholder": "​", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": " 362/0 [00:00<00:00, 3492.25 examples/s]", - "_view_count": null, - "_view_module_version": "1.5.0", - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_40f8c3c973034a7288156a727d84e1fc" - } - }, - "fcca6c8857ea4edfbda08ed390747ad8": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "DescriptionStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "_model_module": "@jupyter-widgets/controls" - } - }, - "83b50dba703b4eb6a3f488083a341dbc": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "530f8a34716e4790b161f578ca592602": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "ProgressStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "bar_color": null, - "_model_module": "@jupyter-widgets/controls" - } - }, - "ecd1834382af47f48ccaed3d3e13b348": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": "20px", - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "4632d5700b7a4180b4ebef6ed36019c1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "DescriptionStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "_model_module": "@jupyter-widgets/controls" - } - }, - "40f8c3c973034a7288156a727d84e1fc": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "73a5417a077f4f7e82e7f11d7f4fefba": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HBoxView", - "_dom_classes": [], - "_model_name": "HBoxModel", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.5.0", - "box_style": "", - "layout": "IPY_MODEL_787e669ab19f4b3694b7560dd9012b68", - "_model_module": "@jupyter-widgets/controls", - "children": [ - "IPY_MODEL_3221c018b6604cada04f2710fd00e750", - "IPY_MODEL_068d9c59920d48b188b7e52c9117b6e6", - "IPY_MODEL_bf95592be6084cb782f11e2957120215" - ] - } - }, - "787e669ab19f4b3694b7560dd9012b68": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "3221c018b6604cada04f2710fd00e750": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HTMLView", - "style": "IPY_MODEL_660a5285054b439496a555cdfef285b8", - "_dom_classes": [], - "description": "", - "_model_name": "HTMLModel", - "placeholder": "​", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": "100%", - "_view_count": null, - "_view_module_version": "1.5.0", - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_1aa35c99719544d995629b2c797b6813" - } - }, - "068d9c59920d48b188b7e52c9117b6e6": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "ProgressView", - "style": "IPY_MODEL_da770238262140a881a3ba9f7c9a6187", - "_dom_classes": [], - "description": "", - "_model_name": "FloatProgressModel", - "bar_style": "success", - "max": 3, - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": 3, - "_view_count": null, - "_view_module_version": "1.5.0", - "orientation": "horizontal", - "min": 0, - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_7a55e9a929c7489fbed6a3fd970c0621" - } - }, - "bf95592be6084cb782f11e2957120215": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "HTMLView", - "style": "IPY_MODEL_d5e2ca12c98b4cb1b7e7f869ee1e549d", - "_dom_classes": [], - "description": "", - "_model_name": "HTMLModel", - "placeholder": "​", - "_view_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "value": " 3/3 [00:00<00:00, 62.18it/s]", - "_view_count": null, - "_view_module_version": "1.5.0", - "description_tooltip": null, - "_model_module": "@jupyter-widgets/controls", - "layout": "IPY_MODEL_57c72b3beb6c422690e0be7ba8c583c5" - } - }, - "660a5285054b439496a555cdfef285b8": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "DescriptionStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "_model_module": "@jupyter-widgets/controls" - } - }, - "1aa35c99719544d995629b2c797b6813": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "da770238262140a881a3ba9f7c9a6187": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "ProgressStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "bar_color": null, - "_model_module": "@jupyter-widgets/controls" - } - }, - "7a55e9a929c7489fbed6a3fd970c0621": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - }, - "d5e2ca12c98b4cb1b7e7f869ee1e549d": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_view_name": "StyleView", - "_model_name": "DescriptionStyleModel", - "description_width": "", - "_view_module": "@jupyter-widgets/base", - "_model_module_version": "1.5.0", - "_view_count": null, - "_view_module_version": "1.2.0", - "_model_module": "@jupyter-widgets/controls" - } - }, - "57c72b3beb6c422690e0be7ba8c583c5": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_view_name": "LayoutView", - "grid_template_rows": null, - "right": null, - "justify_content": null, - "_view_module": "@jupyter-widgets/base", - "overflow": null, - "_model_module_version": "1.2.0", - "_view_count": null, - "flex_flow": null, - "width": null, - "min_width": null, - "border": null, - "align_items": null, - "bottom": null, - "_model_module": "@jupyter-widgets/base", - "top": null, - "grid_column": null, - "overflow_y": null, - "overflow_x": null, - "grid_auto_flow": null, - "grid_area": null, - "grid_template_columns": null, - "flex": null, - "_model_name": "LayoutModel", - "justify_items": null, - "grid_row": null, - "max_height": null, - "align_content": null, - "visibility": null, - "align_self": null, - "height": null, - "min_height": null, - "padding": null, - "grid_auto_rows": null, - "grid_gap": null, - "max_width": null, - "order": null, - "_view_module_version": "1.2.0", - "grid_template_areas": null, - "object_position": null, - "object_fit": null, - "grid_auto_columns": null, - "margin": null, - "display": null, - "left": null - } - } - } + "base_uri": "https://localhost:8080/" + }, + "pycharm": { + "name": "#%%\n" } + }, + "source": [ + "!pip3 install textattack[tensorflow]" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: textattack[tensorflow] in /usr/local/lib/python3.7/dist-packages (0.3.3)\n", + "Requirement already satisfied: language-tool-python in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (2.6.1)\n", + "Requirement already satisfied: terminaltables in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (3.1.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (3.0.12)\n", + "Requirement already satisfied: numpy>=1.19.2 in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (1.19.5)\n", + "Requirement already satisfied: scipy>=1.4.1 in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (1.4.1)\n", + "Requirement already satisfied: word2number in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (1.1)\n", + "Requirement already satisfied: lru-dict in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (1.1.7)\n", + "Requirement already satisfied: nltk in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (3.2.5)\n", + "Requirement already satisfied: pandas>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (1.1.5)\n", + "Requirement already satisfied: num2words in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (0.5.10)\n", + "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (1.7.1)\n", + "Requirement already satisfied: transformers>=3.3.0 in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (4.5.1)\n", + "Requirement already satisfied: lemminflect in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (0.2.2)\n", + "Requirement already satisfied: more-itertools in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (8.8.0)\n", + "Requirement already satisfied: editdistance in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (0.5.3)\n", + "Requirement already satisfied: bert-score>=0.3.5 in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (0.3.10)\n", + "Requirement already satisfied: torch!=1.8,>=1.7.0 in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (1.9.0+cu111)\n", + "Requirement already satisfied: tqdm<4.50.0,>=4.27 in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (4.49.0)\n", + "Requirement already satisfied: flair in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (0.9)\n", + "Requirement already satisfied: datasets in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (1.11.0)\n", + "Requirement already satisfied: tensorflow>=2 in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (2.6.0)\n", + "Requirement already satisfied: tensorflow-hub in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (0.12.0)\n", + "Requirement already satisfied: tensorboardX in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (2.4)\n", + "Collecting tensorflow-text>=2\n", + " Downloading tensorflow_text-2.6.0-cp37-cp37m-manylinux1_x86_64.whl (4.4 MB)\n", + "\u001B[K |████████████████████████████████| 4.4 MB 5.4 MB/s \n", + "\u001B[?25hRequirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from bert-score>=0.3.5->textattack[tensorflow]) (2.26.0)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from bert-score>=0.3.5->textattack[tensorflow]) (3.2.2)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.7/dist-packages (from bert-score>=0.3.5->textattack[tensorflow]) (21.0)\n", + "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.9->bert-score>=0.3.5->textattack[tensorflow]) (2.4.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=1.0.1->textattack[tensorflow]) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas>=1.0.1->textattack[tensorflow]) (2018.9)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas>=1.0.1->textattack[tensorflow]) (1.15.0)\n", + "Requirement already satisfied: grpcio<2.0,>=1.37.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (1.41.0)\n", + "Requirement already satisfied: tensorflow-estimator~=2.6 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (2.6.0)\n", + "Requirement already satisfied: protobuf>=3.9.2 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (3.17.3)\n", + "Requirement already satisfied: google-pasta~=0.2 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (0.2.0)\n", + "Requirement already satisfied: gast==0.4.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (0.4.0)\n", + "Requirement already satisfied: flatbuffers~=1.12.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (1.12)\n", + "Requirement already satisfied: termcolor~=1.1.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (1.1.0)\n", + "Requirement already satisfied: tensorboard~=2.6 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (2.6.0)\n", + "Requirement already satisfied: keras~=2.6 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (2.6.0)\n", + "Requirement already satisfied: clang~=5.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (5.0)\n", + "Requirement already satisfied: absl-py~=0.10 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (0.12.0)\n", + "Requirement already satisfied: wrapt~=1.12.1 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (1.12.1)\n", + "Requirement already satisfied: astunparse~=1.6.3 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (1.6.3)\n", + "Requirement already satisfied: wheel~=0.35 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (0.37.0)\n", + "Requirement already satisfied: typing-extensions~=3.7.4 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (3.7.4.3)\n", + "Requirement already satisfied: opt-einsum~=3.3.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (3.3.0)\n", + "Requirement already satisfied: keras-preprocessing~=1.1.2 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (1.1.2)\n", + "Requirement already satisfied: h5py~=3.1.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (3.1.0)\n", + "Requirement already satisfied: cached-property in /usr/local/lib/python3.7/dist-packages (from h5py~=3.1.0->tensorflow>=2->textattack[tensorflow]) (1.5.2)\n", + "Requirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (0.6.1)\n", + "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (0.4.6)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (3.3.4)\n", + "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (57.4.0)\n", + "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (1.8.0)\n", + "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (1.0.1)\n", + "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (1.35.0)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (0.2.8)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (4.7.2)\n", + "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (4.2.4)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (1.3.0)\n", + "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from markdown>=2.6.8->tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (3.10.1)\n", + "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.7/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (0.4.8)\n", + "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.7/dist-packages (from requests->bert-score>=0.3.5->textattack[tensorflow]) (2.0.6)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->bert-score>=0.3.5->textattack[tensorflow]) (1.25.11)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->bert-score>=0.3.5->textattack[tensorflow]) (2.10)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->bert-score>=0.3.5->textattack[tensorflow]) (2021.5.30)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (3.1.1)\n", + "Requirement already satisfied: sacremoses in /usr/local/lib/python3.7/dist-packages (from transformers>=3.3.0->textattack[tensorflow]) (0.0.46)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers>=3.3.0->textattack[tensorflow]) (2019.12.20)\n", + "Requirement already satisfied: tokenizers<0.11,>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from transformers>=3.3.0->textattack[tensorflow]) (0.10.3)\n", + "Requirement already satisfied: xxhash in /usr/local/lib/python3.7/dist-packages (from datasets->textattack[tensorflow]) (2.0.2)\n", + "Requirement already satisfied: fsspec>=2021.05.0 in /usr/local/lib/python3.7/dist-packages (from datasets->textattack[tensorflow]) (2021.10.1)\n", + "Requirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets->textattack[tensorflow]) (0.70.12.2)\n", + "Requirement already satisfied: huggingface-hub<0.1.0 in /usr/local/lib/python3.7/dist-packages (from datasets->textattack[tensorflow]) (0.0.19)\n", + "Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets->textattack[tensorflow]) (0.3.4)\n", + "Requirement already satisfied: pyarrow!=4.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets->textattack[tensorflow]) (3.0.0)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<0.1.0->datasets->textattack[tensorflow]) (3.13)\n", + "Requirement already satisfied: segtok>=1.5.7 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (1.5.10)\n", + "Requirement already satisfied: hyperopt>=0.1.1 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (0.1.2)\n", + "Requirement already satisfied: langdetect in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (1.0.9)\n", + "Requirement already satisfied: konoha<5.0.0,>=4.0.0 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (4.6.5)\n", + "Requirement already satisfied: bpemb>=0.3.2 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (0.3.3)\n", + "Requirement already satisfied: gdown==3.12.2 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (3.12.2)\n", + "Requirement already satisfied: ftfy in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (6.0.3)\n", + "Requirement already satisfied: conllu>=4.0 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (4.4.1)\n", + "Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (0.22.2.post1)\n", + "Requirement already satisfied: sqlitedict>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (1.7.0)\n", + "Requirement already satisfied: mpld3==0.3 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (0.3)\n", + "Requirement already satisfied: lxml in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (4.2.6)\n", + "Requirement already satisfied: janome in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (0.4.1)\n", + "Requirement already satisfied: gensim<=3.8.3,>=3.4.0 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (3.6.0)\n", + "Requirement already satisfied: sentencepiece==0.1.95 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (0.1.95)\n", + "Requirement already satisfied: tabulate in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (0.8.9)\n", + "Requirement already satisfied: wikipedia-api in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (0.5.4)\n", + "Requirement already satisfied: deprecated>=1.2.4 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (1.2.13)\n", + "Requirement already satisfied: smart-open>=1.2.1 in /usr/local/lib/python3.7/dist-packages (from gensim<=3.8.3,>=3.4.0->flair->textattack[tensorflow]) (5.2.1)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.7/dist-packages (from hyperopt>=0.1.1->flair->textattack[tensorflow]) (2.6.3)\n", + "Requirement already satisfied: future in /usr/local/lib/python3.7/dist-packages (from hyperopt>=0.1.1->flair->textattack[tensorflow]) (0.16.0)\n", + "Requirement already satisfied: pymongo in /usr/local/lib/python3.7/dist-packages (from hyperopt>=0.1.1->flair->textattack[tensorflow]) (3.12.0)\n", + "Requirement already satisfied: overrides<4.0.0,>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from konoha<5.0.0,>=4.0.0->flair->textattack[tensorflow]) (3.1.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->markdown>=2.6.8->tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (3.6.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->bert-score>=0.3.5->textattack[tensorflow]) (1.3.2)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->bert-score>=0.3.5->textattack[tensorflow]) (0.10.0)\n", + "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.21.3->flair->textattack[tensorflow]) (1.0.1)\n", + "Requirement already satisfied: wcwidth in /usr/local/lib/python3.7/dist-packages (from ftfy->flair->textattack[tensorflow]) (0.2.5)\n", + "Requirement already satisfied: docopt>=0.6.2 in /usr/local/lib/python3.7/dist-packages (from num2words->textattack[tensorflow]) (0.6.2)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers>=3.3.0->textattack[tensorflow]) (7.1.2)\n", + "Installing collected packages: tensorflow-text\n", + "Successfully installed tensorflow-text-2.6.0\n" + ] + } + ] }, - "cells": [ + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_br6Xvsif9SA", + "outputId": "1025399f-9f63-4d9a-e854-e4f92ba24c45", + "pycharm": { + "name": "#%%\n" + } + }, + "source": [ + "from allennlp.predictors import Predictor\n", + "import allennlp_models.classification\n", + "\n", + "import textattack\n", + "\n", + "class AllenNLPModel(textattack.models.wrappers.ModelWrapper):\n", + " def __init__(self):\n", + " self.predictor = Predictor.from_path(\"https://storage.googleapis.com/allennlp-public-models/basic_stanford_sentiment_treebank-2020.06.09.tar.gz\")\n", + " self.model = self.predictor._model\n", + " self.tokenizer = self.predictor._dataset_reader._tokenizer\n", + "\n", + " def __call__(self, text_input_list):\n", + " outputs = []\n", + " for text_input in text_input_list:\n", + " outputs.append(self.predictor.predict(sentence=text_input))\n", + " # For each output, outputs['logits'] contains the logits where\n", + " # index 0 corresponds to the positive and index 1 corresponds \n", + " # to the negative score. We reverse the outputs (by reverse slicing,\n", + " # [::-1]) so that negative comes first and positive comes second.\n", + " return [output['logits'][::-1] for output in outputs]\n", + "\n", + "model_wrapper = AllenNLPModel()" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "textattack: Updating TextAttack package dependencies.\n", + "textattack: Downloading NLTK required packages.\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[nltk_data] Downloading package averaged_perceptron_tagger to\n", + "[nltk_data] /root/nltk_data...\n", + "[nltk_data] Unzipping taggers/averaged_perceptron_tagger.zip.\n", + "[nltk_data] Downloading package stopwords to /root/nltk_data...\n", + "[nltk_data] Unzipping corpora/stopwords.zip.\n", + "[nltk_data] Downloading package omw to /root/nltk_data...\n", + "[nltk_data] Unzipping corpora/omw.zip.\n", + "[nltk_data] Downloading package universal_tagset to /root/nltk_data...\n", + "[nltk_data] Unzipping taggers/universal_tagset.zip.\n", + "[nltk_data] Downloading package wordnet to /root/nltk_data...\n", + "[nltk_data] Unzipping corpora/wordnet.zip.\n", + "[nltk_data] Downloading package punkt to /root/nltk_data...\n", + "[nltk_data] Unzipping tokenizers/punkt.zip.\n" + ] + }, { - "cell_type": "markdown", - "metadata": { - "id": "JPVBc5ndpFIX" + "output_type": "stream", + "name": "stderr", + "text": [ + "textattack: Downloading https://textattack.s3.amazonaws.com/word_embeddings/paragramcf.\n", + "100%|██████████| 481M/481M [00:14<00:00, 33.6MB/s]\n", + "textattack: Unzipping file /root/.cache/textattack/tmp7xfefu5f.zip to /root/.cache/textattack/word_embeddings/paragramcf.\n", + "textattack: Successfully saved word_embeddings/paragramcf to cache.\n", + "Plugin allennlp_models could not be loaded: No module named 'nltk.translate.meteor_score'\n", + "downloading: 100%|##########| 37033341/37033341 [00:01<00:00, 27735821.99B/s]\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "6b448a4eedc844ef840ca70aa997d02b", + "bd686416d53a4d88b3ae1e357c4f0e71", + "3b3da3896eca40caac9561b1979c90ba", + "47c06887d2aa477a820737eda5fb3ad4", + "aaed99b5432b47508d5090a8df7c24bc", + "f59ffe8c7da14da08c861235cf2d9ea7", + "869e01668ff342178f40f385a0bc3366", + "266f90eabfea46e1ae5ee4bc22f711ee", + "dff48b2efd70497ba4b28ca6bd1499d9", + "6439f6674b484b14b4e9bf21497efc56", + "45c77d8e79d14fffab433e78b86048ce", + "2cc82d8fd98749e7b160ac4dae04c9d8", + "60b5c7c86aa94936b06981c65b9db3e8", + "d26fbb35af5f45d7ad75977ea9c5ffad", + "9e0287b81c6f45a386858de9c8e8735e", + "b60a716e37964537b122ce1116e002d0", + "516f9277541e4c199a2fa125a75f8bdb", + "6f0f652f722f4827ba1eab9fb081d8d2", + "fd5086beccb6431fa907d90d7168f79f", + "daa2c6454a704b84bd7e2525a52dba0c", + "f7f132d7b56b4bb9b950b09ad27ca115", + "fa983f4a063b4a83856b5d219e3ed04b", + "af00f37ede9e45f0a59fdf54711cf985", + "d88766c2c3bc4e7e83530b7ae6647ffd", + "38c0d380e8ac4290b6d82979d0bb131a", + "c93c7594ee084a4283144933bfcafefd", + "0150a0c8ed674464874ba83453e0ddbd", + "0c8e9d22a63644cd88dd5aa7ba08a21f", + "603c23100ac54d01ada1ebbba7bb5fc0", + "25f91f41a1de48498ebd248a3cce85a8", + "d1b6bdc47c544e84ae3ba3b584c7afa3", + "138cf20c691d4e9784adefea3ceecd1d", + "5d02bf542c7c4c289722e03e56f5d56c", + "424af94826664dc1a8b38f252c4e047f", + "6e779ad14425452aa70f0efbf40f99b4", + "264ab6ca60db4c29ad45830ab9de40ef", + "627d891d68474869a38e1801afe63b89", + "48ac05c8ff36473e8896be8a47d876a4", + "eef891ab6af04cd8ad39e50109c15bba", + "743a844b407e41e7b9a84cc5feb1b7d0", + "d2c4e06c58174175baa80d3c316dcc09", + "5bd2eabf4cd343cf8a6056e8535d3150", + "2accb80f9e0440328199a278739c2d67", + "cb1c32ecba014b84bf5832fff6732526", + "f6ad5b1ec3f64ddbbf0d3bdb6d567658", + "af907b5540244cd4a38d9deeccbba57a", + "0a273d61ed62463daee40739cd52ae28", + "f1c36b2e6651488b900ea7659a10ff4c", + "519fb7f7926c4a31a561153deec61bc1", + "5e009457ac3d4ef5a3b3fb6560b3c80f", + "d8a9cfa29033467c8201007c05897627", + "b8640741b3404eb2956a5b60a377db06", + "810727ad014a42aa8788a03578b8ee52", + "2c2f360da4a64a0b8f21a28774ede852", + "36a2c00fefb64582b09eca3c02a33956", + "2a0bd608a44944fda41042d07d54b076", + "6f5f6167aba247458fa8371416cd27d1", + "182aa21c1cab4699b21c89babf8b92ab", + "37816bf4761c448c9bde942c8a7e4c7e", + "c921586e8995495f8c8313da78382ff7", + "fcca6c8857ea4edfbda08ed390747ad8", + "83b50dba703b4eb6a3f488083a341dbc", + "530f8a34716e4790b161f578ca592602", + "ecd1834382af47f48ccaed3d3e13b348", + "4632d5700b7a4180b4ebef6ed36019c1", + "40f8c3c973034a7288156a727d84e1fc", + "73a5417a077f4f7e82e7f11d7f4fefba", + "787e669ab19f4b3694b7560dd9012b68", + "3221c018b6604cada04f2710fd00e750", + "068d9c59920d48b188b7e52c9117b6e6", + "bf95592be6084cb782f11e2957120215", + "660a5285054b439496a555cdfef285b8", + "1aa35c99719544d995629b2c797b6813", + "da770238262140a881a3ba9f7c9a6187", + "7a55e9a929c7489fbed6a3fd970c0621", + "d5e2ca12c98b4cb1b7e7f869ee1e549d", + "57c72b3beb6c422690e0be7ba8c583c5" + ] + }, + "id": "MDRWI5Psb85g", + "outputId": "e66ec3d6-53d4-4e74-d6da-01a5f285ea98", + "pycharm": { + "name": "#%%\n" + } + }, + "source": [ + "from textattack.datasets import HuggingFaceDataset\n", + "from textattack.attack_recipes import TextBuggerLi2018\n", + "from textattack.attacker import Attacker\n", + "\n", + "\n", + "dataset = HuggingFaceDataset(\"glue\", \"sst2\", \"train\")\n", + "attack = TextBuggerLi2018.build(model_wrapper)\n", + "\n", + "attacker = Attacker(attack, dataset)\n", + "attacker.attack_dataset()" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6b448a4eedc844ef840ca70aa997d02b", + "version_minor": 0, + "version_major": 2 }, - "source": [ - "# TextAttack & AllenNLP \n", - "\n", - "This is an example of testing adversarial attacks from TextAttack on pretrained models provided by AllenNLP. \n", - "\n", - "In a few lines of code, we load a sentiment analysis model trained on the Stanford Sentiment Treebank and configure it with a TextAttack model wrapper. Then, we initialize the TextBugger attack and run the attack on a few samples from the SST-2 train set.\n", - "\n", - "For more information on AllenNLP pre-trained models: https://docs.allennlp.org/models/main/\n", - "\n", - "For more information about the TextBugger attack: https://arxiv.org/abs/1812.05271" + "text/plain": [ + "Downloading: 0%| | 0.00/7.78k [00:00 /dev/null" - ], - "execution_count": 4, - "outputs": [] + "text/plain": [ + "Downloading: 0%| | 0.00/7.44M [00:00=1.19.2 in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (1.19.5)\n", - "Requirement already satisfied: scipy>=1.4.1 in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (1.4.1)\n", - "Requirement already satisfied: word2number in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (1.1)\n", - "Requirement already satisfied: lru-dict in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (1.1.7)\n", - "Requirement already satisfied: nltk in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (3.2.5)\n", - "Requirement already satisfied: pandas>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (1.1.5)\n", - "Requirement already satisfied: num2words in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (0.5.10)\n", - "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (1.7.1)\n", - "Requirement already satisfied: transformers>=3.3.0 in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (4.5.1)\n", - "Requirement already satisfied: lemminflect in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (0.2.2)\n", - "Requirement already satisfied: more-itertools in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (8.8.0)\n", - "Requirement already satisfied: editdistance in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (0.5.3)\n", - "Requirement already satisfied: bert-score>=0.3.5 in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (0.3.10)\n", - "Requirement already satisfied: torch!=1.8,>=1.7.0 in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (1.9.0+cu111)\n", - "Requirement already satisfied: tqdm<4.50.0,>=4.27 in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (4.49.0)\n", - "Requirement already satisfied: flair in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (0.9)\n", - "Requirement already satisfied: datasets in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (1.11.0)\n", - "Requirement already satisfied: tensorflow>=2 in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (2.6.0)\n", - "Requirement already satisfied: tensorflow-hub in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (0.12.0)\n", - "Requirement already satisfied: tensorboardX in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (2.4)\n", - "Collecting tensorflow-text>=2\n", - " Downloading tensorflow_text-2.6.0-cp37-cp37m-manylinux1_x86_64.whl (4.4 MB)\n", - "\u001b[K |████████████████████████████████| 4.4 MB 5.4 MB/s \n", - "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from bert-score>=0.3.5->textattack[tensorflow]) (2.26.0)\n", - "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from bert-score>=0.3.5->textattack[tensorflow]) (3.2.2)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.7/dist-packages (from bert-score>=0.3.5->textattack[tensorflow]) (21.0)\n", - "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.9->bert-score>=0.3.5->textattack[tensorflow]) (2.4.7)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=1.0.1->textattack[tensorflow]) (2.8.2)\n", - "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas>=1.0.1->textattack[tensorflow]) (2018.9)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas>=1.0.1->textattack[tensorflow]) (1.15.0)\n", - "Requirement already satisfied: grpcio<2.0,>=1.37.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (1.41.0)\n", - "Requirement already satisfied: tensorflow-estimator~=2.6 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (2.6.0)\n", - "Requirement already satisfied: protobuf>=3.9.2 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (3.17.3)\n", - "Requirement already satisfied: google-pasta~=0.2 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (0.2.0)\n", - "Requirement already satisfied: gast==0.4.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (0.4.0)\n", - "Requirement already satisfied: flatbuffers~=1.12.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (1.12)\n", - "Requirement already satisfied: termcolor~=1.1.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (1.1.0)\n", - "Requirement already satisfied: tensorboard~=2.6 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (2.6.0)\n", - "Requirement already satisfied: keras~=2.6 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (2.6.0)\n", - "Requirement already satisfied: clang~=5.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (5.0)\n", - "Requirement already satisfied: absl-py~=0.10 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (0.12.0)\n", - "Requirement already satisfied: wrapt~=1.12.1 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (1.12.1)\n", - "Requirement already satisfied: astunparse~=1.6.3 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (1.6.3)\n", - "Requirement already satisfied: wheel~=0.35 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (0.37.0)\n", - "Requirement already satisfied: typing-extensions~=3.7.4 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (3.7.4.3)\n", - "Requirement already satisfied: opt-einsum~=3.3.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (3.3.0)\n", - "Requirement already satisfied: keras-preprocessing~=1.1.2 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (1.1.2)\n", - "Requirement already satisfied: h5py~=3.1.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=2->textattack[tensorflow]) (3.1.0)\n", - "Requirement already satisfied: cached-property in /usr/local/lib/python3.7/dist-packages (from h5py~=3.1.0->tensorflow>=2->textattack[tensorflow]) (1.5.2)\n", - "Requirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (0.6.1)\n", - "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (0.4.6)\n", - "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (3.3.4)\n", - "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (57.4.0)\n", - "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (1.8.0)\n", - "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (1.0.1)\n", - "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (1.35.0)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (0.2.8)\n", - "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (4.7.2)\n", - "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (4.2.4)\n", - "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (1.3.0)\n", - "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from markdown>=2.6.8->tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (3.10.1)\n", - "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.7/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (0.4.8)\n", - "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.7/dist-packages (from requests->bert-score>=0.3.5->textattack[tensorflow]) (2.0.6)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->bert-score>=0.3.5->textattack[tensorflow]) (1.25.11)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->bert-score>=0.3.5->textattack[tensorflow]) (2.10)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->bert-score>=0.3.5->textattack[tensorflow]) (2021.5.30)\n", - "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (3.1.1)\n", - "Requirement already satisfied: sacremoses in /usr/local/lib/python3.7/dist-packages (from transformers>=3.3.0->textattack[tensorflow]) (0.0.46)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers>=3.3.0->textattack[tensorflow]) (2019.12.20)\n", - "Requirement already satisfied: tokenizers<0.11,>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from transformers>=3.3.0->textattack[tensorflow]) (0.10.3)\n", - "Requirement already satisfied: xxhash in /usr/local/lib/python3.7/dist-packages (from datasets->textattack[tensorflow]) (2.0.2)\n", - "Requirement already satisfied: fsspec>=2021.05.0 in /usr/local/lib/python3.7/dist-packages (from datasets->textattack[tensorflow]) (2021.10.1)\n", - "Requirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets->textattack[tensorflow]) (0.70.12.2)\n", - "Requirement already satisfied: huggingface-hub<0.1.0 in /usr/local/lib/python3.7/dist-packages (from datasets->textattack[tensorflow]) (0.0.19)\n", - "Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets->textattack[tensorflow]) (0.3.4)\n", - "Requirement already satisfied: pyarrow!=4.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets->textattack[tensorflow]) (3.0.0)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<0.1.0->datasets->textattack[tensorflow]) (3.13)\n", - "Requirement already satisfied: segtok>=1.5.7 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (1.5.10)\n", - "Requirement already satisfied: hyperopt>=0.1.1 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (0.1.2)\n", - "Requirement already satisfied: langdetect in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (1.0.9)\n", - "Requirement already satisfied: konoha<5.0.0,>=4.0.0 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (4.6.5)\n", - "Requirement already satisfied: bpemb>=0.3.2 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (0.3.3)\n", - "Requirement already satisfied: gdown==3.12.2 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (3.12.2)\n", - "Requirement already satisfied: ftfy in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (6.0.3)\n", - "Requirement already satisfied: conllu>=4.0 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (4.4.1)\n", - "Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (0.22.2.post1)\n", - "Requirement already satisfied: sqlitedict>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (1.7.0)\n", - "Requirement already satisfied: mpld3==0.3 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (0.3)\n", - "Requirement already satisfied: lxml in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (4.2.6)\n", - "Requirement already satisfied: janome in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (0.4.1)\n", - "Requirement already satisfied: gensim<=3.8.3,>=3.4.0 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (3.6.0)\n", - "Requirement already satisfied: sentencepiece==0.1.95 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (0.1.95)\n", - "Requirement already satisfied: tabulate in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (0.8.9)\n", - "Requirement already satisfied: wikipedia-api in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (0.5.4)\n", - "Requirement already satisfied: deprecated>=1.2.4 in /usr/local/lib/python3.7/dist-packages (from flair->textattack[tensorflow]) (1.2.13)\n", - "Requirement already satisfied: smart-open>=1.2.1 in /usr/local/lib/python3.7/dist-packages (from gensim<=3.8.3,>=3.4.0->flair->textattack[tensorflow]) (5.2.1)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.7/dist-packages (from hyperopt>=0.1.1->flair->textattack[tensorflow]) (2.6.3)\n", - "Requirement already satisfied: future in /usr/local/lib/python3.7/dist-packages (from hyperopt>=0.1.1->flair->textattack[tensorflow]) (0.16.0)\n", - "Requirement already satisfied: pymongo in /usr/local/lib/python3.7/dist-packages (from hyperopt>=0.1.1->flair->textattack[tensorflow]) (3.12.0)\n", - "Requirement already satisfied: overrides<4.0.0,>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from konoha<5.0.0,>=4.0.0->flair->textattack[tensorflow]) (3.1.0)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->markdown>=2.6.8->tensorboard~=2.6->tensorflow>=2->textattack[tensorflow]) (3.6.0)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->bert-score>=0.3.5->textattack[tensorflow]) (1.3.2)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->bert-score>=0.3.5->textattack[tensorflow]) (0.10.0)\n", - "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.21.3->flair->textattack[tensorflow]) (1.0.1)\n", - "Requirement already satisfied: wcwidth in /usr/local/lib/python3.7/dist-packages (from ftfy->flair->textattack[tensorflow]) (0.2.5)\n", - "Requirement already satisfied: docopt>=0.6.2 in /usr/local/lib/python3.7/dist-packages (from num2words->textattack[tensorflow]) (0.6.2)\n", - "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers>=3.3.0->textattack[tensorflow]) (7.1.2)\n", - "Installing collected packages: tensorflow-text\n", - "Successfully installed tensorflow-text-2.6.0\n" - ] - } + "text/plain": [ + "0 examples [00:00, ? examples/s]" ] + }, + "metadata": {} }, { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "_br6Xvsif9SA", - "outputId": "1025399f-9f63-4d9a-e854-e4f92ba24c45" + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2a0bd608a44944fda41042d07d54b076", + "version_minor": 0, + "version_major": 2 }, - "source": [ - "from allennlp.predictors import Predictor\n", - "import allennlp_models.classification\n", - "\n", - "import textattack\n", - "\n", - "class AllenNLPModel(textattack.models.wrappers.ModelWrapper):\n", - " def __init__(self):\n", - " self.predictor = Predictor.from_path(\"https://storage.googleapis.com/allennlp-public-models/basic_stanford_sentiment_treebank-2020.06.09.tar.gz\")\n", - " self.model = self.predictor._model\n", - " self.tokenizer = self.predictor._dataset_reader._tokenizer\n", - "\n", - " def __call__(self, text_input_list):\n", - " outputs = []\n", - " for text_input in text_input_list:\n", - " outputs.append(self.predictor.predict(sentence=text_input))\n", - " # For each output, outputs['logits'] contains the logits where\n", - " # index 0 corresponds to the positive and index 1 corresponds \n", - " # to the negative score. We reverse the outputs (by reverse slicing,\n", - " # [::-1]) so that negative comes first and positive comes second.\n", - " return [output['logits'][::-1] for output in outputs]\n", - "\n", - "model_wrapper = AllenNLPModel()" - ], - "execution_count": 8, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "textattack: Updating TextAttack package dependencies.\n", - "textattack: Downloading NLTK required packages.\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "[nltk_data] Downloading package averaged_perceptron_tagger to\n", - "[nltk_data] /root/nltk_data...\n", - "[nltk_data] Unzipping taggers/averaged_perceptron_tagger.zip.\n", - "[nltk_data] Downloading package stopwords to /root/nltk_data...\n", - "[nltk_data] Unzipping corpora/stopwords.zip.\n", - "[nltk_data] Downloading package omw to /root/nltk_data...\n", - "[nltk_data] Unzipping corpora/omw.zip.\n", - "[nltk_data] Downloading package universal_tagset to /root/nltk_data...\n", - "[nltk_data] Unzipping taggers/universal_tagset.zip.\n", - "[nltk_data] Downloading package wordnet to /root/nltk_data...\n", - "[nltk_data] Unzipping corpora/wordnet.zip.\n", - "[nltk_data] Downloading package punkt to /root/nltk_data...\n", - "[nltk_data] Unzipping tokenizers/punkt.zip.\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "textattack: Downloading https://textattack.s3.amazonaws.com/word_embeddings/paragramcf.\n", - "100%|██████████| 481M/481M [00:14<00:00, 33.6MB/s]\n", - "textattack: Unzipping file /root/.cache/textattack/tmp7xfefu5f.zip to /root/.cache/textattack/word_embeddings/paragramcf.\n", - "textattack: Successfully saved word_embeddings/paragramcf to cache.\n", - "Plugin allennlp_models could not be loaded: No module named 'nltk.translate.meteor_score'\n", - "downloading: 100%|##########| 37033341/37033341 [00:01<00:00, 27735821.99B/s]\n" - ] - } + "text/plain": [ + "0 examples [00:00, ? examples/s]" ] + }, + "metadata": {} }, { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000, - "referenced_widgets": [ - "6b448a4eedc844ef840ca70aa997d02b", - "bd686416d53a4d88b3ae1e357c4f0e71", - "3b3da3896eca40caac9561b1979c90ba", - "47c06887d2aa477a820737eda5fb3ad4", - "aaed99b5432b47508d5090a8df7c24bc", - "f59ffe8c7da14da08c861235cf2d9ea7", - "869e01668ff342178f40f385a0bc3366", - "266f90eabfea46e1ae5ee4bc22f711ee", - "dff48b2efd70497ba4b28ca6bd1499d9", - "6439f6674b484b14b4e9bf21497efc56", - "45c77d8e79d14fffab433e78b86048ce", - "2cc82d8fd98749e7b160ac4dae04c9d8", - "60b5c7c86aa94936b06981c65b9db3e8", - "d26fbb35af5f45d7ad75977ea9c5ffad", - "9e0287b81c6f45a386858de9c8e8735e", - "b60a716e37964537b122ce1116e002d0", - "516f9277541e4c199a2fa125a75f8bdb", - "6f0f652f722f4827ba1eab9fb081d8d2", - "fd5086beccb6431fa907d90d7168f79f", - "daa2c6454a704b84bd7e2525a52dba0c", - "f7f132d7b56b4bb9b950b09ad27ca115", - "fa983f4a063b4a83856b5d219e3ed04b", - "af00f37ede9e45f0a59fdf54711cf985", - "d88766c2c3bc4e7e83530b7ae6647ffd", - "38c0d380e8ac4290b6d82979d0bb131a", - "c93c7594ee084a4283144933bfcafefd", - "0150a0c8ed674464874ba83453e0ddbd", - "0c8e9d22a63644cd88dd5aa7ba08a21f", - "603c23100ac54d01ada1ebbba7bb5fc0", - "25f91f41a1de48498ebd248a3cce85a8", - "d1b6bdc47c544e84ae3ba3b584c7afa3", - "138cf20c691d4e9784adefea3ceecd1d", - "5d02bf542c7c4c289722e03e56f5d56c", - "424af94826664dc1a8b38f252c4e047f", - "6e779ad14425452aa70f0efbf40f99b4", - "264ab6ca60db4c29ad45830ab9de40ef", - "627d891d68474869a38e1801afe63b89", - "48ac05c8ff36473e8896be8a47d876a4", - "eef891ab6af04cd8ad39e50109c15bba", - "743a844b407e41e7b9a84cc5feb1b7d0", - "d2c4e06c58174175baa80d3c316dcc09", - "5bd2eabf4cd343cf8a6056e8535d3150", - "2accb80f9e0440328199a278739c2d67", - "cb1c32ecba014b84bf5832fff6732526", - "f6ad5b1ec3f64ddbbf0d3bdb6d567658", - "af907b5540244cd4a38d9deeccbba57a", - "0a273d61ed62463daee40739cd52ae28", - "f1c36b2e6651488b900ea7659a10ff4c", - "519fb7f7926c4a31a561153deec61bc1", - "5e009457ac3d4ef5a3b3fb6560b3c80f", - "d8a9cfa29033467c8201007c05897627", - "b8640741b3404eb2956a5b60a377db06", - "810727ad014a42aa8788a03578b8ee52", - "2c2f360da4a64a0b8f21a28774ede852", - "36a2c00fefb64582b09eca3c02a33956", - "2a0bd608a44944fda41042d07d54b076", - "6f5f6167aba247458fa8371416cd27d1", - "182aa21c1cab4699b21c89babf8b92ab", - "37816bf4761c448c9bde942c8a7e4c7e", - "c921586e8995495f8c8313da78382ff7", - "fcca6c8857ea4edfbda08ed390747ad8", - "83b50dba703b4eb6a3f488083a341dbc", - "530f8a34716e4790b161f578ca592602", - "ecd1834382af47f48ccaed3d3e13b348", - "4632d5700b7a4180b4ebef6ed36019c1", - "40f8c3c973034a7288156a727d84e1fc", - "73a5417a077f4f7e82e7f11d7f4fefba", - "787e669ab19f4b3694b7560dd9012b68", - "3221c018b6604cada04f2710fd00e750", - "068d9c59920d48b188b7e52c9117b6e6", - "bf95592be6084cb782f11e2957120215", - "660a5285054b439496a555cdfef285b8", - "1aa35c99719544d995629b2c797b6813", - "da770238262140a881a3ba9f7c9a6187", - "7a55e9a929c7489fbed6a3fd970c0621", - "d5e2ca12c98b4cb1b7e7f869ee1e549d", - "57c72b3beb6c422690e0be7ba8c583c5" - ] - }, - "id": "MDRWI5Psb85g", - "outputId": "e66ec3d6-53d4-4e74-d6da-01a5f285ea98" + "output_type": "stream", + "name": "stdout", + "text": [ + "Dataset glue downloaded and prepared to /root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad. Subsequent calls will reuse this data.\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "73a5417a077f4f7e82e7f11d7f4fefba", + "version_minor": 0, + "version_major": 2 }, - "source": [ - "from textattack.datasets import HuggingFaceDataset\n", - "from textattack.attack_recipes import TextBuggerLi2018\n", - "from textattack.attacker import Attacker\n", - "\n", - "\n", - "dataset = HuggingFaceDataset(\"glue\", \"sst2\", \"train\")\n", - "attack = TextBuggerLi2018.build(model_wrapper)\n", - "\n", - "attacker = Attacker(attack, dataset)\n", - "attacker.attack_dataset()" - ], - "execution_count": 9, - "outputs": [ - { - "output_type": "display_data", - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "6b448a4eedc844ef840ca70aa997d02b", - "version_minor": 0, - "version_major": 2 - }, - "text/plain": [ - "Downloading: 0%| | 0.00/7.78k [00:00 compatible with goal function .\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Attack(\n", - " (search_method): GreedyWordSwapWIR(\n", - " (wir_method): delete\n", - " )\n", - " (goal_function): UntargetedClassification\n", - " (transformation): CompositeTransformation(\n", - " (0): WordSwapRandomCharacterInsertion(\n", - " (random_one): True\n", - " )\n", - " (1): WordSwapRandomCharacterDeletion(\n", - " (random_one): True\n", - " )\n", - " (2): WordSwapNeighboringCharacterSwap(\n", - " (random_one): True\n", - " )\n", - " (3): WordSwapHomoglyphSwap\n", - " (4): WordSwapEmbedding(\n", - " (max_candidates): 5\n", - " (embedding): WordEmbedding\n", - " )\n", - " )\n", - " (constraints): \n", - " (0): UniversalSentenceEncoder(\n", - " (metric): angular\n", - " (threshold): 0.8\n", - " (window_size): inf\n", - " (skip_text_shorter_than_window): False\n", - " (compare_against_original): True\n", - " )\n", - " (1): RepeatModification\n", - " (2): StopwordModification\n", - " (is_black_box): True\n", - ") \n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\r 0%| | 0/10 [00:00 [[Positive (93%)]]\n", - "\n", - "[[hide]] new secretions from the parental units \n", - "\n", - "[[concealing]] new secretions from the parental units \n", - "\n", - "\n", - "--------------------------------------------- Result 2 ---------------------------------------------\n", - "[[Negative (96%)]] --> [[[FAILED]]]\n", - "\n", - "contains no wit , only labored gags \n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "[Succeeded / Failed / Skipped / Total] 1 / 2 / 1 / 4: 40%|████ | 4/10 [01:27<02:11, 21.91s/it]" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 3 ---------------------------------------------\n", - "[[Positive (100%)]] --> [[[FAILED]]]\n", - "\n", - "that loves its characters and communicates something rather beautiful about human nature \n", - "\n", - "\n", - "--------------------------------------------- Result 4 ---------------------------------------------\n", - "[[Positive (82%)]] --> [[[SKIPPED]]]\n", - "\n", - "remains utterly satisfied to remain the same throughout \n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "[Succeeded / Failed / Skipped / Total] 1 / 3 / 1 / 5: 50%|█████ | 5/10 [01:28<01:28, 17.62s/it]" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 5 ---------------------------------------------\n", - "[[Negative (98%)]] --> [[[FAILED]]]\n", - "\n", - "on the worst revenge-of-the-nerds clichés the filmmakers could dredge up \n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\r[Succeeded / Failed / Skipped / Total] 1 / 4 / 1 / 6: 60%|██████ | 6/10 [01:28<00:59, 14.75s/it]" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 6 ---------------------------------------------\n", - "[[Negative (99%)]] --> [[[FAILED]]]\n", - "\n", - "that 's far too tragic to merit such superficial treatment \n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "[Succeeded / Failed / Skipped / Total] 2 / 5 / 1 / 8: 80%|████████ | 8/10 [01:29<00:22, 11.24s/it]" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 7 ---------------------------------------------\n", - "[[Positive (98%)]] --> [[Negative (62%)]]\n", - "\n", - "[[demonstrates]] that the [[director]] of such [[hollywood]] blockbusters as patriot games can still [[turn]] out a [[small]] , personal [[film]] with an emotional [[wallop]] . \n", - "\n", - "[[shows]] that the [[directors]] of such [[tinseltown]] blockbusters as patriot games can still [[turning]] out a [[tiny]] , personal [[movies]] with an emotional [[batting]] . \n", - "\n", - "\n", - "--------------------------------------------- Result 8 ---------------------------------------------\n", - "[[Positive (90%)]] --> [[[FAILED]]]\n", - "\n", - "of saucy \n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "[Succeeded / Failed / Skipped / Total] 2 / 6 / 1 / 9: 90%|█████████ | 9/10 [01:30<00:10, 10.03s/it]" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 9 ---------------------------------------------\n", - "[[Negative (99%)]] --> [[[FAILED]]]\n", - "\n", - "a depressed fifteen-year-old 's suicidal poetry \n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "[Succeeded / Failed / Skipped / Total] 3 / 6 / 1 / 10: 100%|██████████| 10/10 [01:30<00:00, 9.05s/it]" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 10 ---------------------------------------------\n", - "[[Positive (79%)]] --> [[Negative (65%)]]\n", - "\n", - "are more [[deeply]] thought through than in most ` right-thinking ' films \n", - "\n", - "are more [[seriously]] thought through than in most ` right-thinking ' films \n", - "\n", - "\n", - "\n", - "+-------------------------------+--------+\n", - "| Attack Results | |\n", - "+-------------------------------+--------+\n", - "| Number of successful attacks: | 3 |\n", - "| Number of failed attacks: | 6 |\n", - "| Number of skipped attacks: | 1 |\n", - "| Original accuracy: | 90.0% |\n", - "| Accuracy under attack: | 60.0% |\n", - "| Attack success rate: | 33.33% |\n", - "| Average perturbed word %: | 17.94% |\n", - "| Average num. words per input: | 9.5 |\n", - "| Avg num queries: | 35.11 |\n", - "+-------------------------------+--------+\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ]" - ] - }, - "metadata": {}, - "execution_count": 9 - } + "text/plain": [ + " 0%| | 0/3 [00:00 compatible with goal function .\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Attack(\n", + " (search_method): GreedyWordSwapWIR(\n", + " (wir_method): delete\n", + " )\n", + " (goal_function): UntargetedClassification\n", + " (transformation): CompositeTransformation(\n", + " (0): WordSwapRandomCharacterInsertion(\n", + " (random_one): True\n", + " )\n", + " (1): WordSwapRandomCharacterDeletion(\n", + " (random_one): True\n", + " )\n", + " (2): WordSwapNeighboringCharacterSwap(\n", + " (random_one): True\n", + " )\n", + " (3): WordSwapHomoglyphSwap\n", + " (4): WordSwapEmbedding(\n", + " (max_candidates): 5\n", + " (embedding): WordEmbedding\n", + " )\n", + " )\n", + " (constraints): \n", + " (0): UniversalSentenceEncoder(\n", + " (metric): angular\n", + " (threshold): 0.8\n", + " (window_size): inf\n", + " (skip_text_shorter_than_window): False\n", + " (compare_against_original): True\n", + " )\n", + " (1): RepeatModification\n", + " (2): StopwordModification\n", + " (is_black_box): True\n", + ") \n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\r 0%| | 0/10 [00:00 [[Positive (93%)]]\n", + "\n", + "[[hide]] new secretions from the parental units \n", + "\n", + "[[concealing]] new secretions from the parental units \n", + "\n", + "\n", + "--------------------------------------------- Result 2 ---------------------------------------------\n", + "[[Negative (96%)]] --> [[[FAILED]]]\n", + "\n", + "contains no wit , only labored gags \n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[Succeeded / Failed / Skipped / Total] 1 / 2 / 1 / 4: 40%|████ | 4/10 [01:27<02:11, 21.91s/it]" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 3 ---------------------------------------------\n", + "[[Positive (100%)]] --> [[[FAILED]]]\n", + "\n", + "that loves its characters and communicates something rather beautiful about human nature \n", + "\n", + "\n", + "--------------------------------------------- Result 4 ---------------------------------------------\n", + "[[Positive (82%)]] --> [[[SKIPPED]]]\n", + "\n", + "remains utterly satisfied to remain the same throughout \n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[Succeeded / Failed / Skipped / Total] 1 / 3 / 1 / 5: 50%|█████ | 5/10 [01:28<01:28, 17.62s/it]" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 5 ---------------------------------------------\n", + "[[Negative (98%)]] --> [[[FAILED]]]\n", + "\n", + "on the worst revenge-of-the-nerds clichés the filmmakers could dredge up \n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\r[Succeeded / Failed / Skipped / Total] 1 / 4 / 1 / 6: 60%|██████ | 6/10 [01:28<00:59, 14.75s/it]" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 6 ---------------------------------------------\n", + "[[Negative (99%)]] --> [[[FAILED]]]\n", + "\n", + "that 's far too tragic to merit such superficial treatment \n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[Succeeded / Failed / Skipped / Total] 2 / 5 / 1 / 8: 80%|████████ | 8/10 [01:29<00:22, 11.24s/it]" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 7 ---------------------------------------------\n", + "[[Positive (98%)]] --> [[Negative (62%)]]\n", + "\n", + "[[demonstrates]] that the [[director]] of such [[hollywood]] blockbusters as patriot games can still [[turn]] out a [[small]] , personal [[film]] with an emotional [[wallop]] . \n", + "\n", + "[[shows]] that the [[directors]] of such [[tinseltown]] blockbusters as patriot games can still [[turning]] out a [[tiny]] , personal [[movies]] with an emotional [[batting]] . \n", + "\n", + "\n", + "--------------------------------------------- Result 8 ---------------------------------------------\n", + "[[Positive (90%)]] --> [[[FAILED]]]\n", + "\n", + "of saucy \n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[Succeeded / Failed / Skipped / Total] 2 / 6 / 1 / 9: 90%|█████████ | 9/10 [01:30<00:10, 10.03s/it]" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 9 ---------------------------------------------\n", + "[[Negative (99%)]] --> [[[FAILED]]]\n", + "\n", + "a depressed fifteen-year-old 's suicidal poetry \n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[Succeeded / Failed / Skipped / Total] 3 / 6 / 1 / 10: 100%|██████████| 10/10 [01:30<00:00, 9.05s/it]" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 10 ---------------------------------------------\n", + "[[Positive (79%)]] --> [[Negative (65%)]]\n", + "\n", + "are more [[deeply]] thought through than in most ` right-thinking ' films \n", + "\n", + "are more [[seriously]] thought through than in most ` right-thinking ' films \n", + "\n", + "\n", + "\n", + "+-------------------------------+--------+\n", + "| Attack Results | |\n", + "+-------------------------------+--------+\n", + "| Number of successful attacks: | 3 |\n", + "| Number of failed attacks: | 6 |\n", + "| Number of skipped attacks: | 1 |\n", + "| Original accuracy: | 90.0% |\n", + "| Accuracy under attack: | 60.0% |\n", + "| Attack success rate: | 33.33% |\n", + "| Average perturbed word %: | 17.94% |\n", + "| Average num. words per input: | 9.5 |\n", + "| Avg num queries: | 35.11 |\n", + "+-------------------------------+--------+\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ]" ] + }, + "metadata": {}, + "execution_count": 9 } - ] + ] + } + ] } \ No newline at end of file diff --git a/examples/reactive_defense/sst2_reactive_defense.py b/examples/reactive_defense/sst2_reactive_defense.py index 108e9a957..67a9f2ce4 100644 --- a/examples/reactive_defense/sst2_reactive_defense.py +++ b/examples/reactive_defense/sst2_reactive_defense.py @@ -69,13 +69,13 @@ # | Avg num queries: | 124.56 | # +-------------------------------+--------+ -recipe = GeneticAlgorithmAlzantot2018.build(model_wrapper) +# recipe = GeneticAlgorithmAlzantot2018.build(model_wrapper) # # recipe = BERTAttackLi2020.build(model_wrapper) # # recipe = FasterGeneticAlgorithmJia2019.build(model_wrapper) # -# recipe = DeepWordBugGao2018.build(model_wrapper) +recipe = DeepWordBugGao2018.build(model_wrapper) # +-------------------------------+--------+ # | Attack Results | | # +-------------------------------+--------+ @@ -132,5 +132,6 @@ # https://huggingface.co/spaces/yangheng/TAD # Ref repo: -# https://github.com/yangheng95/TextAttack + # https://github.com/yangheng95/PyABSA +# https://github.com/yangheng95/TextAttack diff --git a/textattack/attack.py b/textattack/attack.py index dcc4ef7be..629463919 100644 --- a/textattack/attack.py +++ b/textattack/attack.py @@ -382,7 +382,7 @@ def filter_transformations( filtered_texts.sort(key=lambda t: t.text) return filtered_texts - def _attack(self, initial_result): + def _attack(self, initial_result, **kwargs): """Calls the ``SearchMethod`` to perturb the ``AttackedText`` stored in ``initial_result``. @@ -393,7 +393,7 @@ def _attack(self, initial_result): A ``SuccessfulAttackResult``, ``FailedAttackResult``, or ``MaximizedAttackResult``. """ - final_result = self.search_method(initial_result) + final_result = self.search_method(initial_result, **kwargs) self.clear_cache() if final_result.goal_status == GoalFunctionResultStatus.SUCCEEDED: result = SuccessfulAttackResult( @@ -414,7 +414,7 @@ def _attack(self, initial_result): raise ValueError(f"Unrecognized goal status {final_result.goal_status}") return result - def attack(self, example, ground_truth_output): + def attack(self, example, ground_truth_output, **kwargs): """Attack a single example. Args: @@ -445,7 +445,7 @@ def attack(self, example, ground_truth_output): if goal_function_result.goal_status == GoalFunctionResultStatus.SKIPPED: return SkippedAttackResult(goal_function_result) else: - result = self._attack(goal_function_result) + result = self._attack(goal_function_result, **kwargs) return result def __repr__(self): diff --git a/textattack/dataset_args.py b/textattack/dataset_args.py index b34ea3e68..5d6bb6c7b 100644 --- a/textattack/dataset_args.py +++ b/textattack/dataset_args.py @@ -117,7 +117,16 @@ 5.0, ), "xlnet-base-cased-wnli": ("glue", "wnli", "validation"), + + # PyABSA models + "tadbert-ag-news": ("ag_news", None, "test"), + "tadbert-imdb": ("imdb", None, "test"), + "tadbert-sst2": ("glue", "sst2", "validation"), + "taddeberta-ag-news": ("ag_news", None, "test"), + "taddeberta-imdb": ("imdb", None, "test"), + "taddeberta-sst2": ("glue", "sst2", "validation"), } + TEXTATTACK_DATASET_BY_MODEL = { # # LSTMs diff --git a/textattack/reactive_defense/reactive_defender.py b/textattack/reactive_defense/reactive_defender.py index a96b6c692..875f8cfb0 100644 --- a/textattack/reactive_defense/reactive_defender.py +++ b/textattack/reactive_defense/reactive_defender.py @@ -16,5 +16,9 @@ class ReactiveDefender(ReprMixin, ABC): def __init__(self, **kwargs): pass - def reactive_defense(self, **kwargs): + def warn_adversary(self, **kwargs): + pass + + + def repair(self, **kwargs): pass diff --git a/textattack/reactive_defense/tad_reactive_defender.py b/textattack/reactive_defense/tad_reactive_defender.py index f3e678981..e8472993a 100644 --- a/textattack/reactive_defense/tad_reactive_defender.py +++ b/textattack/reactive_defense/tad_reactive_defender.py @@ -28,6 +28,6 @@ def __init__(self, ckpt='tad-sst2', **kwargs): self.tad_classifier = TADCheckpointManager.get_tad_text_classifier(checkpoint=PYABSA_MODELS[ckpt], auto_device=True) - def reactive_defense(self, text, **kwargs): + def repair(self, text, **kwargs): res = self.tad_classifier.infer(text, defense='pwws', print_result=False, **kwargs) return res diff --git a/textattack/search_methods/search_method.py b/textattack/search_methods/search_method.py index 76b5981bc..ba574ec4b 100644 --- a/textattack/search_methods/search_method.py +++ b/textattack/search_methods/search_method.py @@ -6,6 +6,7 @@ from abc import ABC, abstractmethod +from textattack.shared import AttackedText from textattack.shared.utils import ReprMixin @@ -17,7 +18,7 @@ class SearchMethod(ReprMixin, ABC): goal is met or the search is exhausted. """ - def __call__(self, initial_result): + def __call__(self, initial_result, **kwargs): """Ensures access to necessary functions, then calls ``perform_search``""" if not hasattr(self, "get_transformations"): @@ -34,6 +35,17 @@ def __call__(self, initial_result): ) result = self.perform_search(initial_result) + + # The interface for apply adversarial defense, which is able to evaluate adversarial defense's performance + reactive_defender = kwargs.get('reactive_defender', None) + if reactive_defender is not None: + # for pyabsa-based defense + repaired_res = reactive_defender.repair(result.attacked_text.text) + # set the repaired result to the goal function result + # result.restored_text = AttackedText(repaired_res['restored_text'], result.attacked_text.attack_attrs) + result.output = int(repaired_res['label']) # use int() as pyabsa only returns string label, this may cause problem in the future + if result.output == result.ground_truth_output: + result.goal_status = 1 # ensure that the number of queries for this GoalFunctionResult is up-to-date result.num_queries = self.goal_function.num_queries return result From 743e22eefc71a4f94935efd6d3a313ea6a31d39c Mon Sep 17 00:00:00 2001 From: yangheng95 Date: Fri, 11 Nov 2022 17:06:09 +0000 Subject: [PATCH 03/12] reformat_code --- .../reactive_defense/sst2_reactive_defense.py | 24 +++--- textattack/attacker.py | 12 +-- ...multilingual_universal_sentence_encoder.py | 8 +- .../universal_sentence_encoder.py | 4 +- textattack/dataset_args.py | 1 - textattack/model_args.py | 13 ++- .../models/wrappers/pyabsa_model_wrapper.py | 12 +-- .../reactive_defense/reactive_defender.py | 2 - .../reactive_defense/tad_reactive_defender.py | 21 +++-- textattack/search_methods/search_method.py | 6 +- textattack/trainer.py | 86 +++++++++---------- 11 files changed, 99 insertions(+), 90 deletions(-) diff --git a/examples/reactive_defense/sst2_reactive_defense.py b/examples/reactive_defense/sst2_reactive_defense.py index 67a9f2ce4..f3aab40c3 100644 --- a/examples/reactive_defense/sst2_reactive_defense.py +++ b/examples/reactive_defense/sst2_reactive_defense.py @@ -1,23 +1,25 @@ import os from textattack import Attacker -from textattack.attack_recipes import (PWWSRen2019, - BAEGarg2019, - TextFoolerJin2019, - BERTAttackLi2020, - GeneticAlgorithmAlzantot2018, - CLARE2020, - FasterGeneticAlgorithmJia2019, - DeepWordBugGao2018, - PSOZang2020) +from textattack.attack_recipes import ( + PWWSRen2019, + BAEGarg2019, + TextFoolerJin2019, + BERTAttackLi2020, + GeneticAlgorithmAlzantot2018, + CLARE2020, + FasterGeneticAlgorithmJia2019, + DeepWordBugGao2018, + PSOZang2020, +) from textattack.datasets import HuggingFaceDataset from textattack.models.wrappers import TADModelWrapper from textattack.reactive_defense.tad_reactive_defender import TADReactiveDefender -dataset = HuggingFaceDataset("glue", subset='sst2', split="validation") +dataset = HuggingFaceDataset("glue", subset="sst2", split="validation") # init reactive_defender to post fix attacked result -reactive_defender = TADReactiveDefender('taddeberta-sst2') +reactive_defender = TADReactiveDefender("taddeberta-sst2") # use the based tad_classifier (without defense) to test target_model = reactive_defender.tad_classifier diff --git a/textattack/attacker.py b/textattack/attacker.py index c49806f44..d26e52620 100644 --- a/textattack/attacker.py +++ b/textattack/attacker.py @@ -105,8 +105,8 @@ def _get_worklist(self, start, end, num_examples, shuffle): def simple_attack(self, text, label): """Internal method that carries out attack. - No parallel processing is involved. - """ + No parallel processing is involved. + """ if torch.cuda.is_available(): self.attack.cuda_() @@ -120,9 +120,11 @@ def simple_attack(self, text, label): except Exception as e: raise e # return - if (isinstance(result, SkippedAttackResult) and self.attack_args.attack_n) or ( - not isinstance(result, SuccessfulAttackResult) - and self.attack_args.num_successful_examples + if ( + isinstance(result, SkippedAttackResult) and self.attack_args.attack_n + ) or ( + not isinstance(result, SuccessfulAttackResult) + and self.attack_args.num_successful_examples ): return else: diff --git a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py index 20381e2bc..f450cb7b2 100644 --- a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py +++ b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py @@ -22,12 +22,8 @@ def __init__(self, threshold=0.8, large=False, metric="angular", **kwargs): tfhub_url = "https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/3" mirror_tfhub_url = "https://hub.tensorflow.google.cn/google/universal-sentence-encoder-multilingual-large/3" else: - tfhub_url = ( - "https://https://tfhub.dev/google/universal-sentence-encoder-multilingual/3" - ) - mirror_tfhub_url = ( - "https://hub.tensorflow.google.cn/google/universal-sentence-encoder-multilingual/3" - ) + tfhub_url = "https://https://tfhub.dev/google/universal-sentence-encoder-multilingual/3" + mirror_tfhub_url = "https://hub.tensorflow.google.cn/google/universal-sentence-encoder-multilingual/3" # TODO add QA SET. Details at: https://hub.tensorflow.google.cn/google/universal-sentence-encoder-multilingual-qa/3 self._tfhub_url = tfhub_url diff --git a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py index 1088eb003..994250a37 100644 --- a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py +++ b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py @@ -21,7 +21,9 @@ def __init__(self, threshold=0.8, large=False, metric="angular", **kwargs): mirror_tfhub_url = "https://hub.tensorflow.google.cn/google/universal-sentence-encoder-large/5" else: tfhub_url = "https://tfhub.dev/google/universal-sentence-encoder/4" - mirror_tfhub_url = "https://hub.tensorflow.google.cn/google/universal-sentence-encoder/4" + mirror_tfhub_url = ( + "https://hub.tensorflow.google.cn/google/universal-sentence-encoder/4" + ) self._tfhub_url = tfhub_url self.mirror_tfhub_url = mirror_tfhub_url diff --git a/textattack/dataset_args.py b/textattack/dataset_args.py index 5d6bb6c7b..c2669ade9 100644 --- a/textattack/dataset_args.py +++ b/textattack/dataset_args.py @@ -117,7 +117,6 @@ 5.0, ), "xlnet-base-cased-wnli": ("glue", "wnli", "validation"), - # PyABSA models "tadbert-ag-news": ("ag_news", None, "test"), "tadbert-imdb": ("imdb", None, "test"), diff --git a/textattack/model_args.py b/textattack/model_args.py index e9607cb4c..237db0220 100644 --- a/textattack/model_args.py +++ b/textattack/model_args.py @@ -152,7 +152,11 @@ def _add_parser_args(cls, parser): """Adds model-related arguments to an argparser.""" model_group = parser.add_mutually_exclusive_group() - model_names = list(HUGGINGFACE_MODELS.keys()) + list(TEXTATTACK_MODELS.keys()) + list(PYABSA_MODELS.keys()) + model_names = ( + list(HUGGINGFACE_MODELS.keys()) + + list(TEXTATTACK_MODELS.keys()) + + list(PYABSA_MODELS.keys()) + ) model_group.add_argument( "--model", type=str, @@ -237,16 +241,17 @@ def _create_model_from_args(cls, args): model = textattack.models.wrappers.HuggingFaceModelWrapper(model, tokenizer) elif args.model in PYABSA_MODELS: from pyabsa import TADCheckpointManager + colored_model_name = textattack.shared.utils.color_text( args.model, color="blue", method="ansi" ) textattack.shared.logger.info( f"Loading pre-trained TAD model from https://github.com/yangheng95/PyABSA: {colored_model_name}" ) - model = TADCheckpointManager.get_tad_text_classifier(checkpoint=PYABSA_MODELS[args.model], auto_device=True) - model = textattack.models.wrappers.TADModelWrapper( - model + model = TADCheckpointManager.get_tad_text_classifier( + checkpoint=PYABSA_MODELS[args.model], auto_device=True ) + model = textattack.models.wrappers.TADModelWrapper(model) elif args.model in TEXTATTACK_MODELS: # Support loading TextAttack pre-trained models via just a keyword. colored_model_name = textattack.shared.utils.color_text( diff --git a/textattack/models/wrappers/pyabsa_model_wrapper.py b/textattack/models/wrappers/pyabsa_model_wrapper.py index a62b4adcd..001b5bc34 100644 --- a/textattack/models/wrappers/pyabsa_model_wrapper.py +++ b/textattack/models/wrappers/pyabsa_model_wrapper.py @@ -8,14 +8,14 @@ class TADModelWrapper(HuggingFaceModelWrapper): - """ Transformers sentiment analysis pipeline returns a list of responses - like + """Transformers sentiment analysis pipeline returns a list of responses + like - [{'label': 'POSITIVE', 'score': 0.7817379832267761}] + [{'label': 'POSITIVE', 'score': 0.7817379832267761}] - We need to convert that to a format TextAttack understands, like + We need to convert that to a format TextAttack understands, like - [[0.218262017, 0.7817379832267761] + [[0.218262017, 0.7817379832267761] """ def __init__(self, model): @@ -25,6 +25,6 @@ def __call__(self, text_inputs, **kwargs): outputs = [] for text_input in text_inputs: raw_outputs = self.model.infer(text_input, print_result=False, **kwargs) - outputs.append(raw_outputs['probs']) + outputs.append(raw_outputs["probs"]) return outputs diff --git a/textattack/reactive_defense/reactive_defender.py b/textattack/reactive_defense/reactive_defender.py index 875f8cfb0..3aca4f20a 100644 --- a/textattack/reactive_defense/reactive_defender.py +++ b/textattack/reactive_defense/reactive_defender.py @@ -12,13 +12,11 @@ class ReactiveDefender(ReprMixin, ABC): - def __init__(self, **kwargs): pass def warn_adversary(self, **kwargs): pass - def repair(self, **kwargs): pass diff --git a/textattack/reactive_defense/tad_reactive_defender.py b/textattack/reactive_defense/tad_reactive_defender.py index e8472993a..f2f6ea81b 100644 --- a/textattack/reactive_defense/tad_reactive_defender.py +++ b/textattack/reactive_defense/tad_reactive_defender.py @@ -13,21 +13,24 @@ class TADReactiveDefender(ReactiveDefender): - """ Transformers sentiment analysis pipeline returns a list of responses - like + """Transformers sentiment analysis pipeline returns a list of responses + like - [{'label': 'POSITIVE', 'score': 0.7817379832267761}] + [{'label': 'POSITIVE', 'score': 0.7817379832267761}] - We need to convert that to a format TextAttack understands, like + We need to convert that to a format TextAttack understands, like - [[0.218262017, 0.7817379832267761] + [[0.218262017, 0.7817379832267761] """ - def __init__(self, ckpt='tad-sst2', **kwargs): + def __init__(self, ckpt="tad-sst2", **kwargs): super().__init__(**kwargs) - self.tad_classifier = TADCheckpointManager.get_tad_text_classifier(checkpoint=PYABSA_MODELS[ckpt], - auto_device=True) + self.tad_classifier = TADCheckpointManager.get_tad_text_classifier( + checkpoint=PYABSA_MODELS[ckpt], auto_device=True + ) def repair(self, text, **kwargs): - res = self.tad_classifier.infer(text, defense='pwws', print_result=False, **kwargs) + res = self.tad_classifier.infer( + text, defense="pwws", print_result=False, **kwargs + ) return res diff --git a/textattack/search_methods/search_method.py b/textattack/search_methods/search_method.py index ba574ec4b..b11060891 100644 --- a/textattack/search_methods/search_method.py +++ b/textattack/search_methods/search_method.py @@ -37,13 +37,15 @@ def __call__(self, initial_result, **kwargs): result = self.perform_search(initial_result) # The interface for apply adversarial defense, which is able to evaluate adversarial defense's performance - reactive_defender = kwargs.get('reactive_defender', None) + reactive_defender = kwargs.get("reactive_defender", None) if reactive_defender is not None: # for pyabsa-based defense repaired_res = reactive_defender.repair(result.attacked_text.text) # set the repaired result to the goal function result # result.restored_text = AttackedText(repaired_res['restored_text'], result.attacked_text.attack_attrs) - result.output = int(repaired_res['label']) # use int() as pyabsa only returns string label, this may cause problem in the future + result.output = int( + repaired_res["label"] + ) # use int() as pyabsa only returns string label, this may cause problem in the future if result.output == result.ground_truth_output: result.goal_status = 1 # ensure that the number of queries for this GoalFunctionResult is up-to-date diff --git a/textattack/trainer.py b/textattack/trainer.py index 77b47c6cd..9c3198ae3 100644 --- a/textattack/trainer.py +++ b/textattack/trainer.py @@ -89,13 +89,13 @@ class Trainer: """ def __init__( - self, - model_wrapper, - task_type="classification", - attack=None, - train_dataset=None, - eval_dataset=None, - training_args=None, + self, + model_wrapper, + task_type="classification", + attack=None, + train_dataset=None, + eval_dataset=None, + training_args=None, ): assert isinstance( model_wrapper, ModelWrapper @@ -164,7 +164,7 @@ def __init__( def _generate_adversarial_examples(self, epoch): """Generate adversarial examples using attacker.""" assert ( - self.attack is not None + self.attack is not None ), "`attack` is `None` but attempting to generate adversarial examples." base_file_name = f"attack-train-{epoch}" log_file_name = os.path.join(self.training_args.output_dir, base_file_name) @@ -217,7 +217,7 @@ def _generate_adversarial_examples(self, epoch): attack_types = collections.Counter(r.__class__.__name__ for r in results) total_attacks = ( - attack_types["SuccessfulAttackResult"] + attack_types["FailedAttackResult"] + attack_types["SuccessfulAttackResult"] + attack_types["FailedAttackResult"] ) success_rate = attack_types["SuccessfulAttackResult"] / total_attacks * 100 logger.info(f"Total number of attack results: {len(results)}") @@ -251,7 +251,7 @@ def _generate_adversarial_examples(self, epoch): return adversarial_dataset def _print_training_args( - self, total_training_steps, train_batch_size, num_clean_epochs + self, total_training_steps, train_batch_size, num_clean_epochs ): logger.info("***** Running training *****") logger.info(f" Num examples = {len(self.train_dataset)}") @@ -269,7 +269,7 @@ def _print_training_args( logger.info(f" Total optimization steps = {total_training_steps}") def _save_model_checkpoint( - self, model, tokenizer, step=None, epoch=None, best=False, last=False + self, model, tokenizer, step=None, epoch=None, best=False, last=False ): # Save model checkpoint if step: @@ -514,8 +514,8 @@ def training_step(self, model, tokenizer, batch): targets = targets.to(textattack.shared.utils.device) if isinstance(model, transformers.PreTrainedModel) or ( - isinstance(model, torch.nn.DataParallel) - and isinstance(model.module, transformers.PreTrainedModel) + isinstance(model, torch.nn.DataParallel) + and isinstance(model.module, transformers.PreTrainedModel) ): input_ids = tokenizer( input_texts, @@ -639,11 +639,11 @@ def train(self): num_clean_epochs = self.training_args.num_clean_epochs total_clean_training_steps = ( - math.ceil( - len(self.train_dataset) - / (train_batch_size * self.training_args.gradient_accumulation_steps) - ) - * num_clean_epochs + math.ceil( + len(self.train_dataset) + / (train_batch_size * self.training_args.gradient_accumulation_steps) + ) + * num_clean_epochs ) # calculate total_adv_training_data_length based on type of @@ -651,13 +651,13 @@ def train(self): # if num_train_adv_examples is float , num_train_adv_examples is a portion of train_dataset. if isinstance(self.training_args.num_train_adv_examples, float): total_adv_training_data_length = ( - len(self.train_dataset) * self.training_args.num_train_adv_examples + len(self.train_dataset) * self.training_args.num_train_adv_examples ) # if num_train_adv_examples is int and >=0 then it is taken as value. elif ( - isinstance(self.training_args.num_train_adv_examples, int) - and self.training_args.num_train_adv_examples >= 0 + isinstance(self.training_args.num_train_adv_examples, int) + and self.training_args.num_train_adv_examples >= 0 ): total_adv_training_data_length = self.training_args.num_train_adv_examples @@ -702,7 +702,7 @@ def train(self): if self.attack and epoch > num_clean_epochs: if ( - epoch - num_clean_epochs - 1 + epoch - num_clean_epochs - 1 ) % self.training_args.attack_epoch_interval == 0: # only generate a new adversarial training set every self.training_args.attack_period epochs after the clean epochs # adv_dataset is instance of `textattack.datasets.Dataset` @@ -759,7 +759,7 @@ def train(self): # TODO: Better way to handle TB and Wandb logging if (self._global_step > 0) and ( - self._global_step % self.training_args.logging_interval_step == 0 + self._global_step % self.training_args.logging_interval_step == 0 ): lr_to_log = ( scheduler.get_last_lr()[0] @@ -788,12 +788,12 @@ def train(self): # Save model checkpoint to file. if self.training_args.checkpoint_interval_steps: if ( - self._global_step > 0 - and ( + self._global_step > 0 + and ( self._global_step % self.training_args.checkpoint_interval_steps - ) - == 0 + ) + == 0 ): self._save_model_checkpoint( model, tokenizer, step=self._global_step @@ -835,8 +835,8 @@ def train(self): ) if ( - self.training_args.checkpoint_interval_epochs - and (epoch % self.training_args.checkpoint_interval_epochs) == 0 + self.training_args.checkpoint_interval_epochs + and (epoch % self.training_args.checkpoint_interval_epochs) == 0 ): self._save_model_checkpoint(model, tokenizer, epoch=epoch) @@ -851,8 +851,8 @@ def train(self): else: epochs_since_best_eval_score += 1 if self.training_args.early_stopping_epochs and ( - epochs_since_best_eval_score - > self.training_args.early_stopping_epochs + epochs_since_best_eval_score + > self.training_args.early_stopping_epochs ): logger.info( f"Stopping early since it's been {self.training_args.early_stopping_epochs} steps since validation score increased." @@ -932,8 +932,8 @@ def _write_readme(self, best_eval_score, best_eval_score_epoch, train_batch_size model_name = self.training_args.model_name_or_path elif isinstance(self.model_wrapper.model, transformers.PreTrainedModel): if ( - hasattr(self.model_wrapper.model.config, "_name_or_path") - and self.model_wrapper.model.config._name_or_path in HUGGINGFACE_MODELS + hasattr(self.model_wrapper.model.config, "_name_or_path") + and self.model_wrapper.model.config._name_or_path in HUGGINGFACE_MODELS ): # TODO Better way than just checking HUGGINGFACE_MODELS ? model_name = self.model_wrapper.model.config._name_or_path @@ -948,17 +948,17 @@ def _write_readme(self, best_eval_score, best_eval_score_epoch, train_batch_size model_name = f"`{model_name}`" if ( - isinstance(self.training_args, CommandLineTrainingArgs) - and self.training_args.model_max_length + isinstance(self.training_args, CommandLineTrainingArgs) + and self.training_args.model_max_length ): model_max_length = self.training_args.model_max_length elif isinstance( - self.model_wrapper.model, - ( - transformers.PreTrainedModel, - LSTMForClassification, - WordCNNForClassification, - ), + self.model_wrapper.model, + ( + transformers.PreTrainedModel, + LSTMForClassification, + WordCNNForClassification, + ), ): model_max_length = self.model_wrapper.tokenizer.model_max_length else: @@ -970,13 +970,13 @@ def _write_readme(self, best_eval_score, best_eval_score_epoch, train_batch_size model_max_length_str = "" if isinstance( - self.train_dataset, textattack.datasets.HuggingFaceDataset + self.train_dataset, textattack.datasets.HuggingFaceDataset ) and hasattr(self.train_dataset, "_name"): dataset_name = self.train_dataset._name if hasattr(self.train_dataset, "_subset"): dataset_name += f" ({self.train_dataset._subset})" elif isinstance( - self.eval_dataset, textattack.datasets.HuggingFaceDataset + self.eval_dataset, textattack.datasets.HuggingFaceDataset ) and hasattr(self.eval_dataset, "_name"): dataset_name = self.eval_dataset._name if hasattr(self.eval_dataset, "_subset"): From 3991ddcb0b06aaf9cb73551db5856bf8b063c611 Mon Sep 17 00:00:00 2001 From: yangheng95 Date: Fri, 11 Nov 2022 19:07:59 +0000 Subject: [PATCH 04/12] refomrat --- .../reactive_defense/sst2_reactive_defense.py | 18 +++++++++--------- .../multilingual_universal_sentence_encoder.py | 6 ++++-- .../universal_sentence_encoder.py | 6 ++++-- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/examples/reactive_defense/sst2_reactive_defense.py b/examples/reactive_defense/sst2_reactive_defense.py index f3aab40c3..722094edd 100644 --- a/examples/reactive_defense/sst2_reactive_defense.py +++ b/examples/reactive_defense/sst2_reactive_defense.py @@ -1,16 +1,16 @@ -import os +# import os from textattack import Attacker from textattack.attack_recipes import ( - PWWSRen2019, - BAEGarg2019, - TextFoolerJin2019, - BERTAttackLi2020, - GeneticAlgorithmAlzantot2018, - CLARE2020, - FasterGeneticAlgorithmJia2019, + # PWWSRen2019, + # BAEGarg2019, + # TextFoolerJin2019, + # BERTAttackLi2020, + # GeneticAlgorithmAlzantot2018, + # CLARE2020, + # FasterGeneticAlgorithmJia2019, DeepWordBugGao2018, - PSOZang2020, + # PSOZang2020, ) from textattack.datasets import HuggingFaceDataset from textattack.models.wrappers import TADModelWrapper diff --git a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py index f450cb7b2..06dd84781 100644 --- a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py +++ b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py @@ -30,7 +30,8 @@ def __init__(self, threshold=0.8, large=False, metric="angular", **kwargs): self.mirror_tfhub_url = mirror_tfhub_url try: self.model = hub.load(self._tfhub_url) - except: + except Exception as e: + print('Error loading model from tfhub, trying mirror url') self.model = hub.load(self.mirror_tfhub_url) def encode(self, sentences): @@ -45,5 +46,6 @@ def __setstate__(self, state): self.__dict__ = state try: self.model = hub.load(self._tfhub_url) - except: + except Exception as e: + print('Error loading model from tfhub, trying mirror url') self.model = hub.load(self.mirror_tfhub_url) diff --git a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py index 994250a37..6b8327a7f 100644 --- a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py +++ b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py @@ -34,7 +34,8 @@ def encode(self, sentences): if not self.model: try: self.model = hub.load(self._tfhub_url) - except: + except Exception as e: + print('Error loading model from tfhub, trying mirror url') self.model = hub.load(self.mirror_tfhub_url) return self.model(sentences).numpy() @@ -47,5 +48,6 @@ def __setstate__(self, state): self.__dict__ = state try: self.model = hub.load(self._tfhub_url) - except: + except Exception as e: + print('Error loading model from tfhub, trying mirror url') self.model = hub.load(self.mirror_tfhub_url) From 7e2eea1f07be94479ee4cd2a64d0e6a05dc4e546 Mon Sep 17 00:00:00 2001 From: yangheng95 Date: Tue, 15 Nov 2022 16:27:14 +0000 Subject: [PATCH 05/12] freeze_pyabsa_version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 61f0870e5..62567b3f1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,4 +23,4 @@ jieba OpenHowNet pycld2 click<8.1.0 -pyabsa>=1.16.16 +pyabsa>=1.16.16,<2.0.0 From 13a317b3e5e081f8f49ef459796965dcabd3aead Mon Sep 17 00:00:00 2001 From: yangheng95 Date: Sat, 19 Nov 2022 14:16:52 +0000 Subject: [PATCH 06/12] update_pyabsa_version --- examples/reactive_defense/checkpoints-v2.0.json | 1 + requirements.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 examples/reactive_defense/checkpoints-v2.0.json diff --git a/examples/reactive_defense/checkpoints-v2.0.json b/examples/reactive_defense/checkpoints-v2.0.json new file mode 100644 index 000000000..36aaaa9cd --- /dev/null +++ b/examples/reactive_defense/checkpoints-v2.0.json @@ -0,0 +1 @@ +{"2.0.0": {"APC": {"multilingual": {"id": "", "Training Model": "FAST-LSA-T-V2-Deberta", "Training Dataset": "APCDatasetList.Multilingual", "Language": "Multilingual", "Description": "Trained on RTX3090", "Available Version": "1.10.5+", "Checkpoint File": "fast_lcf_bert_Multilingual_acc_82.66_f1_82.06.zip", "Author": "H, Yang (hy345@exeter.ac.uk)"}, "english": {"id": "", "Training Model": "FAST-LSA-T-V2-Deberta", "Training Dataset": "APCDatasetList.English", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.10.5+", "Checkpoint File": "fast_lsa_t_v2_English_acc_82.21_f1_81.81.zip", "Author": "H, Yang (hy345@exeter.ac.uk)"}, "chinese": {"id": "", "Training Model": "FAST-LSA-T-V2-Deberta", "Training Dataset": "APCDatasetList.Chinese", "Language": "Chinese", "Description": "Trained on RTX3090", "Available Version": "1.10.5+", "Checkpoint File": "fast_lsa_t_v2_Chinese_acc_96.0_f1_95.1.zip", "Author": "H, Yang (hy345@exeter.ac.uk)"}}, "ATEPC": {"multilingual": {"id": "", "Training Model": "FAST-LCF-ATEPC", "Training Dataset": "ABSADatasets.Multilingual", "Language": "Multilingual", "Description": "Trained on RTX3090", "Available Version": "1.16.0+", "Checkpoint File": "fast_lcf_atepc_Multilingual_cdw_apcacc_78.08_apcf1_77.81_atef1_75.41.zip", "Author": "H, Yang (hy345@exeter.ac.uk)"}, "english": {"id": "", "Training Model": "FAST-LCF-ATEPC", "Training Dataset": "ATEPCDatasetList.English", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.10.5+", "Checkpoint File": "fast_lcf_atepc_English_cdw_apcacc_82.36_apcf1_81.89_atef1_75.43.zip", "Author": "H, Yang (hy345@exeter.ac.uk)"}, "chinese": {"id": "", "Training Model": "FAST-LCF-ATEPC", "Training Dataset": "ATEPCDatasetList.Chinese", "Language": "Chinese", "Description": "Trained on RTX3090", "Available Version": "1.10.5+", "Checkpoint File": "fast_lcf_atepc_Chinese_cdw_apcacc_96.22_apcf1_95.32_atef1_78.73.zip", "Author": "H, Yang (hy345@exeter.ac.uk)"}}, "RNAC": {"degrad_lstm": {"id": "", "Training Model": "LSTM", "Training Dataset": "ABSADatasets.Multilingual", "Language": "RNA", "Description": "Trained on RTX3090", "Available Version": "1.16.0+", "Checkpoint File": "lstm_degrad_acc_85.26_f1_84.62.zip", "Author": "H, Yang (hy345@exeter.ac.uk)"}, "degrad_bert": {"id": "", "Training Model": "MLP", "Training Dataset": "Degrad", "Language": "RNA", "Description": "Trained on RTX3090", "Available Version": "1.16.0+", "Checkpoint File": "bert_mlp_degrad_acc_87.44_f1_86.99.zip", "Author": "H, Yang (hy345@exeter.ac.uk)"}}, "TAD": {"tad-sst2": {"id": "", "Training Model": "TAD", "Training Dataset": "SST2", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-SST2.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-agnews10k": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-AGNews10K.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}, "tad-amazon": {"id": "", "Training Model": "TAD", "Training Dataset": "AGNews", "Language": "English", "Description": "Trained on RTX3090", "Available Version": "1.15+", "Checkpoint File": "TAD-Amazon.zip", "Author": "H, Yang (yangheng@m.scnu.edu.cn)"}}}} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 62567b3f1..80f582ec5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,4 +23,4 @@ jieba OpenHowNet pycld2 click<8.1.0 -pyabsa>=1.16.16,<2.0.0 +pyabsa>=2.0.6 From 6176650b12c75f4bdc96e54832249f03c5985909 Mon Sep 17 00:00:00 2001 From: HENG Date: Thu, 31 Aug 2023 00:45:57 +0100 Subject: [PATCH 07/12] reformat --- examples/attack/attack_keras_parallel.py | 1 - textattack/attack_recipes/morpheus_tan_2020.py | 1 - textattack/attack_recipes/seq2sick_cheng_2018_blackbox.py | 1 - textattack/commands/augment_command.py | 1 - textattack/commands/eval_model_command.py | 2 +- textattack/constraints/overlap/max_words_perturbed.py | 1 - .../multilingual_universal_sentence_encoder.py | 4 ++-- .../universal_sentence_encoder/universal_sentence_encoder.py | 4 ++-- .../classification_goal_function_result.py | 1 - .../text_to_text_goal_function_result.py | 1 - textattack/loggers/weights_and_biases_logger.py | 1 - textattack/metrics/quality_metrics/perplexity.py | 1 - textattack/search_methods/greedy_word_swap_wir.py | 1 - textattack/shared/attacked_text.py | 1 + textattack/shared/validators.py | 5 ++++- textattack/trainer.py | 1 - textattack/training_args.py | 1 - .../transformations/word_swaps/word_swap_change_name.py | 1 - .../transformations/word_swaps/word_swap_change_number.py | 2 +- 19 files changed, 11 insertions(+), 20 deletions(-) diff --git a/examples/attack/attack_keras_parallel.py b/examples/attack/attack_keras_parallel.py index f05fcc2a5..617e08422 100644 --- a/examples/attack/attack_keras_parallel.py +++ b/examples/attack/attack_keras_parallel.py @@ -70,7 +70,6 @@ def __init__(self, model): self.model = model def __call__(self, text_input_list): - x_transform = [] for i, review in enumerate(text_input_list): tokens = [x.strip(",") for x in review.split()] diff --git a/textattack/attack_recipes/morpheus_tan_2020.py b/textattack/attack_recipes/morpheus_tan_2020.py index edf8ae790..b98360a53 100644 --- a/textattack/attack_recipes/morpheus_tan_2020.py +++ b/textattack/attack_recipes/morpheus_tan_2020.py @@ -27,7 +27,6 @@ class MorpheusTan2020(AttackRecipe): @staticmethod def build(model_wrapper): - # # Goal is to minimize BLEU score between the model output given for the # perturbed input sequence and the reference translation diff --git a/textattack/attack_recipes/seq2sick_cheng_2018_blackbox.py b/textattack/attack_recipes/seq2sick_cheng_2018_blackbox.py index de800c522..86b79aa23 100644 --- a/textattack/attack_recipes/seq2sick_cheng_2018_blackbox.py +++ b/textattack/attack_recipes/seq2sick_cheng_2018_blackbox.py @@ -31,7 +31,6 @@ class Seq2SickCheng2018BlackBox(AttackRecipe): @staticmethod def build(model_wrapper, goal_function="non_overlapping"): - # # Goal is non-overlapping output. # diff --git a/textattack/commands/augment_command.py b/textattack/commands/augment_command.py index 118fe0150..2883ded76 100644 --- a/textattack/commands/augment_command.py +++ b/textattack/commands/augment_command.py @@ -32,7 +32,6 @@ def run(self, args): args = textattack.AugmenterArgs(**vars(args)) if args.interactive: - print("\nRunning in interactive mode...\n") augmenter = eval(AUGMENTATION_RECIPE_NAMES[args.recipe])( pct_words_to_swap=args.pct_words_to_swap, diff --git a/textattack/commands/eval_model_command.py b/textattack/commands/eval_model_command.py index 16cbfd2fa..7957fbfee 100644 --- a/textattack/commands/eval_model_command.py +++ b/textattack/commands/eval_model_command.py @@ -56,7 +56,7 @@ def test_model_on_dataset(self, args): while i < min(args.num_examples, len(dataset)): dataset_batch = dataset[i : min(args.num_examples, i + args.batch_size)] batch_inputs = [] - for (text_input, ground_truth_output) in dataset_batch: + for text_input, ground_truth_output in dataset_batch: attacked_text = textattack.shared.AttackedText(text_input) batch_inputs.append(attacked_text.tokenizer_input) ground_truth_outputs.append(ground_truth_output) diff --git a/textattack/constraints/overlap/max_words_perturbed.py b/textattack/constraints/overlap/max_words_perturbed.py index b919978c9..8d09a4108 100644 --- a/textattack/constraints/overlap/max_words_perturbed.py +++ b/textattack/constraints/overlap/max_words_perturbed.py @@ -38,7 +38,6 @@ def __init__( self.max_percent = max_percent def _check_constraint(self, transformed_text, reference_text): - num_words_diff = len(transformed_text.all_words_diff(reference_text)) if self.max_percent: min_num_words = min(len(transformed_text.words), len(reference_text.words)) diff --git a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py index 06dd84781..0249588ec 100644 --- a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py +++ b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py @@ -31,7 +31,7 @@ def __init__(self, threshold=0.8, large=False, metric="angular", **kwargs): try: self.model = hub.load(self._tfhub_url) except Exception as e: - print('Error loading model from tfhub, trying mirror url') + print("Error loading model from tfhub, trying mirror url") self.model = hub.load(self.mirror_tfhub_url) def encode(self, sentences): @@ -47,5 +47,5 @@ def __setstate__(self, state): try: self.model = hub.load(self._tfhub_url) except Exception as e: - print('Error loading model from tfhub, trying mirror url') + print("Error loading model from tfhub, trying mirror url") self.model = hub.load(self.mirror_tfhub_url) diff --git a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py index 6b8327a7f..a1aae3377 100644 --- a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py +++ b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py @@ -35,7 +35,7 @@ def encode(self, sentences): try: self.model = hub.load(self._tfhub_url) except Exception as e: - print('Error loading model from tfhub, trying mirror url') + print("Error loading model from tfhub, trying mirror url") self.model = hub.load(self.mirror_tfhub_url) return self.model(sentences).numpy() @@ -49,5 +49,5 @@ def __setstate__(self, state): try: self.model = hub.load(self._tfhub_url) except Exception as e: - print('Error loading model from tfhub, trying mirror url') + print("Error loading model from tfhub, trying mirror url") self.model = hub.load(self.mirror_tfhub_url) diff --git a/textattack/goal_function_results/classification_goal_function_result.py b/textattack/goal_function_results/classification_goal_function_result.py index 3a70ded8e..1b9aaf532 100644 --- a/textattack/goal_function_results/classification_goal_function_result.py +++ b/textattack/goal_function_results/classification_goal_function_result.py @@ -26,7 +26,6 @@ def __init__( num_queries, ground_truth_output, ): - super().__init__( attacked_text, raw_output, diff --git a/textattack/goal_function_results/text_to_text_goal_function_result.py b/textattack/goal_function_results/text_to_text_goal_function_result.py index eae8d91e5..c50e2c11f 100644 --- a/textattack/goal_function_results/text_to_text_goal_function_result.py +++ b/textattack/goal_function_results/text_to_text_goal_function_result.py @@ -23,7 +23,6 @@ def __init__( num_queries, ground_truth_output, ): - super().__init__( attacked_text, raw_output, diff --git a/textattack/loggers/weights_and_biases_logger.py b/textattack/loggers/weights_and_biases_logger.py index 6a8303117..7b9990421 100644 --- a/textattack/loggers/weights_and_biases_logger.py +++ b/textattack/loggers/weights_and_biases_logger.py @@ -13,7 +13,6 @@ class WeightsAndBiasesLogger(Logger): """Logs attack results to Weights & Biases.""" def __init__(self, **kwargs): - global wandb wandb = LazyLoader("wandb", globals(), "wandb") diff --git a/textattack/metrics/quality_metrics/perplexity.py b/textattack/metrics/quality_metrics/perplexity.py index e22175219..f1572591f 100644 --- a/textattack/metrics/quality_metrics/perplexity.py +++ b/textattack/metrics/quality_metrics/perplexity.py @@ -94,7 +94,6 @@ def calculate(self, results): return self.all_metrics def calc_ppl(self, texts): - with torch.no_grad(): text = " ".join(texts) eval_loss = [] diff --git a/textattack/search_methods/greedy_word_swap_wir.py b/textattack/search_methods/greedy_word_swap_wir.py index ac17fbf30..5721ce6b6 100644 --- a/textattack/search_methods/greedy_word_swap_wir.py +++ b/textattack/search_methods/greedy_word_swap_wir.py @@ -65,7 +65,6 @@ def _get_index_order(self, initial_text): # compute the largest change in score we can find by swapping each word delta_ps = [] for idx in indices_to_order: - # Exit Loop when search_over is True - but we need to make sure delta_ps # is the same size as softmax_saliency_scores if search_over: diff --git a/textattack/shared/attacked_text.py b/textattack/shared/attacked_text.py index 11d27bfb2..4616b467e 100644 --- a/textattack/shared/attacked_text.py +++ b/textattack/shared/attacked_text.py @@ -259,6 +259,7 @@ def ith_word_diff(self, other_attacked_text: AttackedText, i: int) -> bool: def words_diff_num(self, other_attacked_text: AttackedText) -> int: """The number of words different between two AttackedText objects.""" + # using edit distance to calculate words diff num def generate_tokens(words): result = {} diff --git a/textattack/shared/validators.py b/textattack/shared/validators.py index 4d9611d5a..fcf08e150 100644 --- a/textattack/shared/validators.py +++ b/textattack/shared/validators.py @@ -24,7 +24,10 @@ r"^textattack.models.helpers.word_cnn_for_classification.*", r"^transformers.modeling_\w*\.\w*ForSequenceClassification$", ], - (NonOverlappingOutput, MinimizeBleu,): [ + ( + NonOverlappingOutput, + MinimizeBleu, + ): [ r"^textattack.models.helpers.t5_for_text_to_text.*", ], } diff --git a/textattack/trainer.py b/textattack/trainer.py index 9c3198ae3..26d72d315 100644 --- a/textattack/trainer.py +++ b/textattack/trainer.py @@ -407,7 +407,6 @@ def collate_fn(data): is_adv_sample = [] for item in data: if "_example_type" in item[0].keys(): - # Get example type value from OrderedDict and remove it adv = item[0].pop("_example_type") diff --git a/textattack/training_args.py b/textattack/training_args.py index 6c5aa034d..c6e02c171 100644 --- a/textattack/training_args.py +++ b/textattack/training_args.py @@ -547,7 +547,6 @@ def _create_dataset_from_args(cls, args): train_dataset.output_column == "label" and eval_dataset.output_column == "label" ): - train_dataset_labels = train_dataset._dataset["label"] eval_dataset_labels = eval_dataset._dataset["label"] diff --git a/textattack/transformations/word_swaps/word_swap_change_name.py b/textattack/transformations/word_swaps/word_swap_change_name.py index d54b755a5..c4feeff48 100644 --- a/textattack/transformations/word_swaps/word_swap_change_name.py +++ b/textattack/transformations/word_swaps/word_swap_change_name.py @@ -64,7 +64,6 @@ def _get_transformations(self, current_text, indices_to_modify): return transformed_texts def _get_replacement_words(self, word, word_part_of_speech): - replacement_words = [] tag = word_part_of_speech if ( diff --git a/textattack/transformations/word_swaps/word_swap_change_number.py b/textattack/transformations/word_swaps/word_swap_change_number.py index 1ced0f84d..b885b6fa4 100644 --- a/textattack/transformations/word_swaps/word_swap_change_number.py +++ b/textattack/transformations/word_swaps/word_swap_change_number.py @@ -70,7 +70,7 @@ def _get_transformations(self, current_text, indices_to_modify): # replace original numbers with new numbers transformed_texts = [] - for (idx, word) in num_words: + for idx, word in num_words: replacement_words = self._get_new_number(word) for r in replacement_words: if r == word: From 1d4317c4d8782b1ed7a9331062a9d9260d3ac7c4 Mon Sep 17 00:00:00 2001 From: HENG Date: Thu, 31 Aug 2023 00:53:02 +0100 Subject: [PATCH 08/12] reformat --- .../multilingual_universal_sentence_encoder.py | 4 ++-- .../universal_sentence_encoder/universal_sentence_encoder.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py index 0249588ec..4997b2ae4 100644 --- a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py +++ b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/multilingual_universal_sentence_encoder.py @@ -31,7 +31,7 @@ def __init__(self, threshold=0.8, large=False, metric="angular", **kwargs): try: self.model = hub.load(self._tfhub_url) except Exception as e: - print("Error loading model from tfhub, trying mirror url") + print("Error loading model from tfhub, trying mirror url, Exception: ", e) self.model = hub.load(self.mirror_tfhub_url) def encode(self, sentences): @@ -47,5 +47,5 @@ def __setstate__(self, state): try: self.model = hub.load(self._tfhub_url) except Exception as e: - print("Error loading model from tfhub, trying mirror url") + print("Error loading model from tfhub, trying mirror url, Exception: ", e) self.model = hub.load(self.mirror_tfhub_url) diff --git a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py index a1aae3377..13d988171 100644 --- a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py +++ b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py @@ -35,7 +35,7 @@ def encode(self, sentences): try: self.model = hub.load(self._tfhub_url) except Exception as e: - print("Error loading model from tfhub, trying mirror url") + print("Error loading model from tfhub, trying mirror url. Exception: ", e) self.model = hub.load(self.mirror_tfhub_url) return self.model(sentences).numpy() @@ -49,5 +49,5 @@ def __setstate__(self, state): try: self.model = hub.load(self._tfhub_url) except Exception as e: - print("Error loading model from tfhub, trying mirror url") + print("Error loading model from tfhub, trying mirror url. Exception: ", e) self.model = hub.load(self.mirror_tfhub_url) From 9eeef7950d07b7470e92c9e918a7097086eee062 Mon Sep 17 00:00:00 2001 From: HENG Date: Thu, 31 Aug 2023 01:00:06 +0100 Subject: [PATCH 09/12] reformat --- requirements.txt | 1 + .../universal_sentence_encoder/universal_sentence_encoder.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 80f582ec5..995f1551b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,4 +23,5 @@ jieba OpenHowNet pycld2 click<8.1.0 +pinyin pyabsa>=2.0.6 diff --git a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py index 13d988171..94a0d07b4 100644 --- a/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py +++ b/textattack/constraints/semantics/sentence_encoders/universal_sentence_encoder/universal_sentence_encoder.py @@ -35,7 +35,9 @@ def encode(self, sentences): try: self.model = hub.load(self._tfhub_url) except Exception as e: - print("Error loading model from tfhub, trying mirror url. Exception: ", e) + print( + "Error loading model from tfhub, trying mirror url. Exception: ", e + ) self.model = hub.load(self.mirror_tfhub_url) return self.model(sentences).numpy() From 77be715cd4f852cb6dad652f2898244377956b70 Mon Sep 17 00:00:00 2001 From: HENG Date: Thu, 31 Aug 2023 01:23:02 +0100 Subject: [PATCH 10/12] clean --- examples/reactive_defense/readme.md | 58 +++++++++++++++++++ .../reactive_defense/reactive_defender.py | 2 +- textattack/search_methods/search_method.py | 2 +- 3 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 examples/reactive_defense/readme.md diff --git a/examples/reactive_defense/readme.md b/examples/reactive_defense/readme.md new file mode 100644 index 000000000..abad6d487 --- /dev/null +++ b/examples/reactive_defense/readme.md @@ -0,0 +1,58 @@ +# Reactive Adversarial Defense + +This folder shows the example of using the reactive adversarial defense method to defend against adversarial attacks. + +## Introduction +Recent studies have shown that large pre-trained language models are vulnerable to adversarial attacks. Existing methods attempt to reconstruct the adversarial examples. However, these methods usually have limited performance in defense against adversarial examples, while also negatively impacting the performance on natural examples. To overcome this problem, we propose a method called Reactive Perturbation Defocusing (RPD). RPD uses an adversarial detector to identify adversarial examples and reduce false defenses on natural examples. Instead of reconstructing the adversaries, RPD injects safe perturbations into adversarial examples to distract the objective models from the malicious perturbations. Our experiments on three datasets, two objective models, and various adversarial attacks show that our proposed framework successfully repairs up to approximately 97% of correctly identified adversarial examples with only about a 2% performance decrease on natural examples. We also provide a demo of adversarial detection and repair based on our work. + +## Requirements +- Python 3.9 +- PyTorch +- transformers +- pyabsa >= 2.0.6 + +## Usage +### 1. Install pyabsa and textattack +```bash +pip install transformers +pip install pyabsa +pip install textattack +``` + +### 2. Run the adversarial attack against the defense of Rapid +This will attack the model `tadbert-sst2` with the `textfooler` recipe on the `sst2` dataset. The attack will be defended by the defense of Rapid. +The available models and datasets can be found in [Textattack](https://github.com/yangheng95/TextAttack/blob/9eeef7950d07b7470e92c9e918a7097086eee062/textattack/model_args.py#L96) +You can report the adversarial attack performance under the defense of Rapid by running the following command: +```bash +textattack attack --recipe textfooler --model tadbert-sst2 --num-examples 100 --dataset sst2 --attack-n 1 +``` + +## Script examples +Please find the script examples in [examples](https://github.com/yangheng95/TextAttack/blob/master/examples/reactive_defense/sst2_reactive_defense.py) + +### 4. Play with the Demo +You can play with the demo on Huggingface space. The [demo](https://huggingface.co/spaces/anonymous8/Rapid-Textual-Adversarial-Defense) is based on the [PyABSA](https://github.com/yangheng95/PyABSA) abd [Textattack](https://github.com/QData/TextAttack). + + +## Citation +If you find this repo helpful, please cite the following paper: +``` +@article{DBLP:journals/corr/abs-2305-04067, + author = {Heng Yang and + Ke Li}, + title = {Reactive Perturbation Defocusing for Textual Adversarial Defense}, + journal = {CoRR}, + volume = {abs/2305.04067}, + year = {2023}, + url = {https://doi.org/10.48550/arXiv.2305.04067}, + doi = {10.48550/arXiv.2305.04067}, + eprinttype = {arXiv}, + eprint = {2305.04067}, + timestamp = {Thu, 11 May 2023 15:54:24 +0200}, + biburl = {https://dblp.org/rec/journals/corr/abs-2305-04067.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} +``` + +## Author +This work is presented by [Heng Yang](https://github.com/yangheng95). If you have any question, please contact hy345@exeter.ac.uk \ No newline at end of file diff --git a/textattack/reactive_defense/reactive_defender.py b/textattack/reactive_defense/reactive_defender.py index 3aca4f20a..80f43661b 100644 --- a/textattack/reactive_defense/reactive_defender.py +++ b/textattack/reactive_defense/reactive_defender.py @@ -6,7 +6,7 @@ # huggingface: https://huggingface.co/yangheng # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en # Copyright (C) 2021. All Rights Reserved. -from abc import ABC, abstractmethod +from abc import ABC from textattack.shared.utils import ReprMixin diff --git a/textattack/search_methods/search_method.py b/textattack/search_methods/search_method.py index b11060891..0950330e4 100644 --- a/textattack/search_methods/search_method.py +++ b/textattack/search_methods/search_method.py @@ -4,7 +4,7 @@ """ -from abc import ABC, abstractmethod +from abc import ABC from textattack.shared import AttackedText from textattack.shared.utils import ReprMixin From 22ab1e5d84e12d53d553868c3933824b52548c70 Mon Sep 17 00:00:00 2001 From: HENG Date: Thu, 31 Aug 2023 01:32:46 +0100 Subject: [PATCH 11/12] clean --- textattack/search_methods/search_method.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/textattack/search_methods/search_method.py b/textattack/search_methods/search_method.py index 0950330e4..a98abc2c0 100644 --- a/textattack/search_methods/search_method.py +++ b/textattack/search_methods/search_method.py @@ -4,9 +4,9 @@ """ -from abc import ABC +from abc import ABC, abstractmethod -from textattack.shared import AttackedText +# from textattack.shared import AttackedText from textattack.shared.utils import ReprMixin From 669dfcd7e369cd761b78437b0af9bfea08742dec Mon Sep 17 00:00:00 2001 From: Yanjun Qi Date: Mon, 11 Sep 2023 16:42:48 -0400 Subject: [PATCH 12/12] correct format issues --- .../1_Introduction_and_Transformations.ipynb | 76 +- docs/2notebook/2_Constraints.ipynb | 86 +- docs/2notebook/3_Augmentations.ipynb | 732 +-- .../4_Custom_Datasets_Word_Embedding.ipynb | 62 +- docs/2notebook/Example_0_tensorflow.ipynb | 45 +- docs/2notebook/Example_1_sklearn.ipynb | 110 +- docs/2notebook/Example_2_allennlp.ipynb | 16 +- docs/2notebook/Example_3_Keras.ipynb | 74 +- docs/2notebook/Example_4_CamemBERT.ipynb | 43 +- docs/2notebook/Example_5_Explain_BERT.ipynb | 69 +- docs/2notebook/Example_6_Chinese_Attack.ipynb | 5263 ++++++++++------- .../models/wrappers/pyabsa_model_wrapper.py | 2 +- .../reactive_defense/tad_reactive_defender.py | 2 +- 13 files changed, 3766 insertions(+), 2814 deletions(-) diff --git a/docs/2notebook/1_Introduction_and_Transformations.ipynb b/docs/2notebook/1_Introduction_and_Transformations.ipynb index 781aa923d..6a4db7ec3 100644 --- a/docs/2notebook/1_Introduction_and_Transformations.ipynb +++ b/docs/2notebook/1_Introduction_and_Transformations.ipynb @@ -77,19 +77,19 @@ "source": [ "from textattack.transformations import WordSwap\n", "\n", + "\n", "class BananaWordSwap(WordSwap):\n", - " \"\"\" Transforms an input by replacing any word with 'banana'.\n", - " \"\"\"\n", - " \n", + " \"\"\"Transforms an input by replacing any word with 'banana'.\"\"\"\n", + "\n", " # We don't need a constructor, since our class doesn't require any parameters.\n", "\n", " def _get_replacement_words(self, word):\n", - " \"\"\" Returns 'banana', no matter what 'word' was originally.\n", - " \n", - " Returns a list with one item, since `_get_replacement_words` is intended to\n", - " return a list of candidate replacement words.\n", + " \"\"\"Returns 'banana', no matter what 'word' was originally.\n", + "\n", + " Returns a list with one item, since `_get_replacement_words` is intended to\n", + " return a list of candidate replacement words.\n", " \"\"\"\n", - " return ['banana']" + " return [\"banana\"]" ] }, { @@ -133,17 +133,23 @@ "import transformers\n", "from textattack.models.wrappers import HuggingFaceModelWrapper\n", "\n", - "model = transformers.AutoModelForSequenceClassification.from_pretrained(\"textattack/bert-base-uncased-ag-news\")\n", - "tokenizer = transformers.AutoTokenizer.from_pretrained(\"textattack/bert-base-uncased-ag-news\")\n", + "model = transformers.AutoModelForSequenceClassification.from_pretrained(\n", + " \"textattack/bert-base-uncased-ag-news\"\n", + ")\n", + "tokenizer = transformers.AutoTokenizer.from_pretrained(\n", + " \"textattack/bert-base-uncased-ag-news\"\n", + ")\n", "\n", "model_wrapper = HuggingFaceModelWrapper(model, tokenizer)\n", "\n", "# Create the goal function using the model\n", "from textattack.goal_functions import UntargetedClassification\n", + "\n", "goal_function = UntargetedClassification(model_wrapper)\n", "\n", "# Import the dataset\n", "from textattack.datasets import HuggingFaceDataset\n", + "\n", "dataset = HuggingFaceDataset(\"ag_news\", None, \"test\")" ] }, @@ -166,14 +172,16 @@ "outputs": [], "source": [ "from textattack.search_methods import GreedySearch\n", - "from textattack.constraints.pre_transformation import RepeatModification, StopwordModification\n", + "from textattack.constraints.pre_transformation import (\n", + " RepeatModification,\n", + " StopwordModification,\n", + ")\n", "from textattack import Attack\n", "\n", "# We're going to use our Banana word swap class as the attack transformation.\n", - "transformation = BananaWordSwap() \n", + "transformation = BananaWordSwap()\n", "# We'll constrain modification of already modified indices and stopwords\n", - "constraints = [RepeatModification(),\n", - " StopwordModification()]\n", + "constraints = [RepeatModification(), StopwordModification()]\n", "# We'll use the Greedy search method\n", "search_method = GreedySearch()\n", "# Now, let's make the attack from the 4 components:\n", @@ -517,8 +525,8 @@ } ], "source": [ - "from tqdm import tqdm # tqdm provides us a nice progress bar.\n", - "from textattack.loggers import CSVLogger # tracks a dataframe for us.\n", + "from tqdm import tqdm # tqdm provides us a nice progress bar.\n", + "from textattack.loggers import CSVLogger # tracks a dataframe for us.\n", "from textattack.attack_results import SuccessfulAttackResult\n", "from textattack import Attacker\n", "from textattack import AttackArgs\n", @@ -530,14 +538,14 @@ "\n", "attack_results = attacker.attack_dataset()\n", "\n", - "#The following legacy tutorial code shows how the Attack API works in detail.\n", + "# The following legacy tutorial code shows how the Attack API works in detail.\n", "\n", - "#logger = CSVLogger(color_method='html')\n", + "# logger = CSVLogger(color_method='html')\n", "\n", - "#num_successes = 0\n", - "#i = 0\n", - "#while num_successes < 10:\n", - " #result = next(results_iterable)\n", + "# num_successes = 0\n", + "# i = 0\n", + "# while num_successes < 10:\n", + "# result = next(results_iterable)\n", "# example, ground_truth_output = dataset[i]\n", "# i += 1\n", "# result = attack.attack(example, ground_truth_output)\n", @@ -652,15 +660,19 @@ ], "source": [ "import pandas as pd\n", - "pd.options.display.max_colwidth = 480 # increase colum width so we can actually read the examples\n", "\n", - "logger = CSVLogger(color_method='html')\n", + "pd.options.display.max_colwidth = (\n", + " 480 # increase colum width so we can actually read the examples\n", + ")\n", + "\n", + "logger = CSVLogger(color_method=\"html\")\n", "\n", "for result in attack_results:\n", " logger.log_attack_result(result)\n", "\n", "from IPython.core.display import display, HTML\n", - "display(HTML(logger.df[['original_text', 'perturbed_text']].to_html(escape=False)))" + "\n", + "display(HTML(logger.df[[\"original_text\", \"perturbed_text\"]].to_html(escape=False)))" ] }, { @@ -867,10 +879,10 @@ "# For AG News, labels are 0: World, 1: Sports, 2: Business, 3: Sci/Tech\n", "\n", "custom_dataset = [\n", - " ('Malaria deaths in Africa fall by 5% from last year', 0),\n", - " ('Washington Nationals defeat the Houston Astros to win the World Series', 1),\n", - " ('Exxon Mobil hires a new CEO', 2),\n", - " ('Microsoft invests $1 billion in OpenAI', 3),\n", + " (\"Malaria deaths in Africa fall by 5% from last year\", 0),\n", + " (\"Washington Nationals defeat the Houston Astros to win the World Series\", 1),\n", + " (\"Exxon Mobil hires a new CEO\", 2),\n", + " (\"Microsoft invests $1 billion in OpenAI\", 3),\n", "]\n", "\n", "attack_args = AttackArgs(num_examples=4)\n", @@ -881,14 +893,14 @@ "\n", "results_iterable = attacker.attack_dataset()\n", "\n", - "logger = CSVLogger(color_method='html')\n", + "logger = CSVLogger(color_method=\"html\")\n", "\n", "for result in results_iterable:\n", " logger.log_attack_result(result)\n", "\n", "from IPython.core.display import display, HTML\n", - " \n", - "display(HTML(logger.df[['original_text', 'perturbed_text']].to_html(escape=False)))" + "\n", + "display(HTML(logger.df[[\"original_text\", \"perturbed_text\"]].to_html(escape=False)))" ] } ], diff --git a/docs/2notebook/2_Constraints.ipynb b/docs/2notebook/2_Constraints.ipynb index 3f384995b..b219ca2c3 100644 --- a/docs/2notebook/2_Constraints.ipynb +++ b/docs/2notebook/2_Constraints.ipynb @@ -100,6 +100,7 @@ ], "source": [ "import tensorflow as tf\n", + "\n", "print(tf.__version__)" ] }, @@ -149,10 +150,11 @@ "!pip3 install .\n", "\n", "import nltk\n", - "nltk.download('punkt') # The NLTK tokenizer\n", - "nltk.download('maxent_ne_chunker') # NLTK named-entity chunker\n", - "nltk.download('words') # NLTK list of words\n", - "nltk.download('averaged_perceptron_tagger')" + "\n", + "nltk.download(\"punkt\") # The NLTK tokenizer\n", + "nltk.download(\"maxent_ne_chunker\") # NLTK named-entity chunker\n", + "nltk.download(\"words\") # NLTK list of words\n", + "nltk.download(\"averaged_perceptron_tagger\")" ] }, { @@ -205,8 +207,10 @@ } ], "source": [ - "sentence = ('In 2017, star quarterback Tom Brady led the Patriots to the Super Bowl, '\n", - " 'but lost to the Philadelphia Eagles.')\n", + "sentence = (\n", + " \"In 2017, star quarterback Tom Brady led the Patriots to the Super Bowl, \"\n", + " \"but lost to the Philadelphia Eagles.\"\n", + ")\n", "\n", "# 1. Tokenize using the NLTK tokenizer.\n", "tokens = nltk.word_tokenize(sentence)\n", @@ -285,6 +289,7 @@ "source": [ "import functools\n", "\n", + "\n", "@functools.lru_cache(maxsize=2**14)\n", "def get_entities(sentence):\n", " tokens = nltk.word_tokenize(sentence)\n", @@ -379,9 +384,10 @@ "source": [ "from textattack.constraints import Constraint\n", "\n", + "\n", "class NamedEntityConstraint(Constraint):\n", - " \"\"\" A constraint that ensures `transformed_text` only substitutes named entities from `current_text` with other named entities.\n", - " \"\"\"\n", + " \"\"\"A constraint that ensures `transformed_text` only substitutes named entities from `current_text` with other named entities.\"\"\"\n", + "\n", " def _check_constraint(self, transformed_text, current_text):\n", " transformed_entities = get_entities(transformed_text.text)\n", " current_entities = get_entities(current_text.text)\n", @@ -390,26 +396,27 @@ " if len(current_entities) == 0:\n", " return False\n", " if len(current_entities) != len(transformed_entities):\n", - " # If the two sentences have a different number of entities, then \n", - " # they definitely don't have the same labels. In this case, the \n", + " # If the two sentences have a different number of entities, then\n", + " # they definitely don't have the same labels. In this case, the\n", " # constraint is violated, and we return False.\n", " return False\n", " else:\n", " # Here we compare all of the words, in order, to make sure that they match.\n", - " # If we find two words that don't match, this means a word was swapped \n", + " # If we find two words that don't match, this means a word was swapped\n", " # between `current_text` and `transformed_text`. That word must be a named entity to fulfill our\n", " # constraint.\n", " current_word_label = None\n", " transformed_word_label = None\n", - " for (word_1, label_1), (word_2, label_2) in zip(current_entities, transformed_entities):\n", + " for (word_1, label_1), (word_2, label_2) in zip(\n", + " current_entities, transformed_entities\n", + " ):\n", " if word_1 != word_2:\n", - " # Finally, make sure that words swapped between `x` and `x_adv` are named entities. If \n", + " # Finally, make sure that words swapped between `x` and `x_adv` are named entities. If\n", " # they're not, then we also return False.\n", - " if (label_1 not in ['NNP', 'NE']) or (label_2 not in ['NNP', 'NE']):\n", - " return False \n", + " if (label_1 not in [\"NNP\", \"NE\"]) or (label_2 not in [\"NNP\", \"NE\"]):\n", + " return False\n", " # If we get here, all of the labels match up. Return True!\n", - " return True\n", - " " + " return True" ] }, { @@ -638,17 +645,23 @@ "import transformers\n", "from textattack.models.wrappers import HuggingFaceModelWrapper\n", "\n", - "model = transformers.AutoModelForSequenceClassification.from_pretrained(\"textattack/albert-base-v2-ag-news\")\n", - "tokenizer = transformers.AutoTokenizer.from_pretrained(\"textattack/albert-base-v2-ag-news\")\n", + "model = transformers.AutoModelForSequenceClassification.from_pretrained(\n", + " \"textattack/albert-base-v2-ag-news\"\n", + ")\n", + "tokenizer = transformers.AutoTokenizer.from_pretrained(\n", + " \"textattack/albert-base-v2-ag-news\"\n", + ")\n", "\n", "model_wrapper = HuggingFaceModelWrapper(model, tokenizer)\n", "\n", "# Create the goal function using the model\n", "from textattack.goal_functions import UntargetedClassification\n", + "\n", "goal_function = UntargetedClassification(model_wrapper)\n", "\n", "# Import the dataset\n", "from textattack.datasets import HuggingFaceDataset\n", + "\n", "dataset = HuggingFaceDataset(\"ag_news\", None, \"test\")" ] }, @@ -663,23 +676,27 @@ "from textattack.transformations import WordSwapEmbedding\n", "from textattack.search_methods import GreedyWordSwapWIR\n", "from textattack import Attack\n", - "from textattack.constraints.pre_transformation import RepeatModification, StopwordModification\n", + "from textattack.constraints.pre_transformation import (\n", + " RepeatModification,\n", + " StopwordModification,\n", + ")\n", "\n", "# We're going to the `WordSwapEmbedding` transformation. Using the default settings, this\n", - "# will try substituting words with their neighbors in the counter-fitted embedding space. \n", - "transformation = WordSwapEmbedding(max_candidates=20) \n", + "# will try substituting words with their neighbors in the counter-fitted embedding space.\n", + "transformation = WordSwapEmbedding(max_candidates=20)\n", "\n", "# We'll use the greedy search with word importance ranking method again\n", "search_method = GreedyWordSwapWIR()\n", "\n", "# Our constraints will be the same as Tutorial 1, plus the named entity constraint\n", - "constraints = [RepeatModification(),\n", - " StopwordModification(),\n", - " NamedEntityConstraint(False)]\n", + "constraints = [\n", + " RepeatModification(),\n", + " StopwordModification(),\n", + " NamedEntityConstraint(False),\n", + "]\n", "\n", - "# Now, let's make the attack using these parameters. \n", - "attack = Attack(goal_function, constraints, transformation, search_method)\n", - "\n" + "# Now, let's make the attack using these parameters.\n", + "attack = Attack(goal_function, constraints, transformation, search_method)" ] }, { @@ -800,11 +817,13 @@ } ], "source": [ - "from textattack.loggers import CSVLogger # tracks a dataframe for us.\n", + "from textattack.loggers import CSVLogger # tracks a dataframe for us.\n", "from textattack.attack_results import SuccessfulAttackResult\n", "from textattack import Attacker, AttackArgs\n", "\n", - "attack_args = AttackArgs(num_successful_examples=5, log_to_csv=\"results.csv\", csv_coloring_style=\"html\")\n", + "attack_args = AttackArgs(\n", + " num_successful_examples=5, log_to_csv=\"results.csv\", csv_coloring_style=\"html\"\n", + ")\n", "attacker = Attacker(attack, dataset, attack_args)\n", "\n", "attacker.attack_dataset()" @@ -833,13 +852,16 @@ "outputs": [], "source": [ "import pandas as pd\n", - "pd.options.display.max_colwidth = 480 # increase column width so we can actually read the examples\n", + "\n", + "pd.options.display.max_colwidth = (\n", + " 480 # increase column width so we can actually read the examples\n", + ")\n", "\n", "from IPython.core.display import display, HTML\n", "\n", "logger = attacker.attack_log_manager.loggers[0]\n", "successes = logger.df[logger.df[\"result_type\"] == \"Successful\"]\n", - "display(HTML(successes[['original_text', 'perturbed_text']].to_html(escape=False)))" + "display(HTML(successes[[\"original_text\", \"perturbed_text\"]].to_html(escape=False)))" ] }, { diff --git a/docs/2notebook/3_Augmentations.ipynb b/docs/2notebook/3_Augmentations.ipynb index 4f72058df..f136fe609 100644 --- a/docs/2notebook/3_Augmentations.ipynb +++ b/docs/2notebook/3_Augmentations.ipynb @@ -1,378 +1,392 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "Augmentation with TextAttack.ipynb", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Augmentation with TextAttack.ipynb", + "provenance": [] }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "m83IiqVREJ96" - }, - "source": [ - "# TextAttack Augmentation" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6UZ0d84hEJ98" - }, - "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QData/TextAttack/blob/master/docs/2notebook/3_Augmentations.ipynb)\n", - "\n", - "[![View Source on GitHub](https://img.shields.io/badge/github-view%20source-black.svg)](https://github.com/QData/TextAttack/blob/master/docs/2notebook/3_Augmentations.ipynb)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tjqc2c5_7YaX" - }, - "source": [ - " Please remember to run the following in your notebook enviroment before running the tutorial codes:\n", - "\n", - "```\n", - "pip3 install textattack[tensorflow]\n", - "```\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qZ5xnoevEJ99" - }, - "source": [ - "Augmenting a dataset using TextAttack requries only a few lines of code when it is done right. The `Augmenter` class is created for this purpose to generate augmentations of a string or a list of strings. Augmentation could be done in either python script or command line.\n", - "\n", - "### Creating an Augmenter\n", - "\n", - "The **Augmenter** class is essensial for performing data augmentation using TextAttack. It takes in four paramerters in the following order:\n", - "\n", - "\n", - "1. **transformation**: all [transformations](https://textattack.readthedocs.io/en/latest/apidoc/textattack.transformations.html) implemented by TextAttack can be used to create an `Augmenter`. Note here that if we want to apply multiple transformations in the same time, they first need to be incooporated into a `CompositeTransformation` class.\n", - "2. **constraints**: [constraints](https://textattack.readthedocs.io/en/latest/apidoc/textattack.constraints.html#) determine whether or not a given augmentation is valid, consequently enhancing the quality of the augmentations. The default augmenter does not have any constraints but contraints can be supplied as a list to the Augmenter.\n", - "3. **pct_words_to_swap**: percentage of words to swap per augmented example. The default is set to 0.1 (10%).\n", - "4. **transformations_per_example** maximum number of augmentations per input. The default is set to 1 (one augmented sentence given one original input)\n", - "\n", - "An example of creating one's own augmenter is shown below. In this case, we are creating an augmenter with **RandomCharacterDeletion** and **WordSwapQWERTY** transformations, **RepeatModification** and **StopWordModification** constraints. A maximum of **50%** of the words could be purturbed, and 10 augmentations will be generated from each input sentence.\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "5AXyxiLD4X93" - }, - "source": [ - "# import transformations, contraints, and the Augmenter\n", - "from textattack.transformations import WordSwapRandomCharacterDeletion\n", - "from textattack.transformations import WordSwapQWERTY\n", - "from textattack.transformations import CompositeTransformation\n", - "\n", - "from textattack.constraints.pre_transformation import RepeatModification\n", - "from textattack.constraints.pre_transformation import StopwordModification\n", - "\n", - "from textattack.augmentation import Augmenter" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "wFeXF_OL-vyw", - "outputId": "c041e77e-accd-4a58-88be-9b140dd0cd56" - }, - "source": [ - "# Set up transformation using CompositeTransformation()\n", - "transformation = CompositeTransformation([WordSwapRandomCharacterDeletion(), WordSwapQWERTY()])\n", - "# Set up constraints\n", - "constraints = [RepeatModification(), StopwordModification()]\n", - "# Create augmenter with specified parameters\n", - "augmenter = Augmenter(transformation=transformation, constraints=constraints, pct_words_to_swap=0.5, transformations_per_example=10)\n", - "s = 'What I cannot create, I do not understand.'\n", - "# Augment!\n", - "augmenter.augment(s)" - ], - "execution_count": null, - "outputs": [ - { - "data": { - "text/plain": [ - "['Ahat I camnot reate, I do not unerstand.',\n", - " 'Ahat I cwnnot crewte, I do not undefstand.',\n", - " 'Wat I camnot vreate, I do not undefstand.',\n", - " 'Wha I annot crate, I do not unerstand.',\n", - " 'Whaf I canno creatr, I do not ynderstand.',\n", - " 'Wtat I cannor dreate, I do not understwnd.',\n", - " 'Wuat I canno ceate, I do not unferstand.',\n", - " 'hat I cnnot ceate, I do not undersand.',\n", - " 'hat I cnnot cfeate, I do not undfrstand.',\n", - " 'hat I cwnnot crfate, I do not ujderstand.']" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "b7020KtvEJ9-" - }, - "source": [ - "### Pre-built Augmentation Recipes\n", - "\n", - "In addition to creating our own augmenter, we could also use pre-built augmentation recipes to perturb datasets. These recipes are implemented from publishded papers and are very convenient to use. The list of available recipes can be found [here](https://textattack.readthedocs.io/en/latest/3recipes/augmenter_recipes.html).\n" - ] + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "m83IiqVREJ96" + }, + "source": [ + "# TextAttack Augmentation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6UZ0d84hEJ98" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QData/TextAttack/blob/master/docs/2notebook/3_Augmentations.ipynb)\n", + "\n", + "[![View Source on GitHub](https://img.shields.io/badge/github-view%20source-black.svg)](https://github.com/QData/TextAttack/blob/master/docs/2notebook/3_Augmentations.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tjqc2c5_7YaX" + }, + "source": [ + " Please remember to run the following in your notebook enviroment before running the tutorial codes:\n", + "\n", + "```\n", + "pip3 install textattack[tensorflow]\n", + "```\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qZ5xnoevEJ99" + }, + "source": [ + "Augmenting a dataset using TextAttack requries only a few lines of code when it is done right. The `Augmenter` class is created for this purpose to generate augmentations of a string or a list of strings. Augmentation could be done in either python script or command line.\n", + "\n", + "### Creating an Augmenter\n", + "\n", + "The **Augmenter** class is essensial for performing data augmentation using TextAttack. It takes in four paramerters in the following order:\n", + "\n", + "\n", + "1. **transformation**: all [transformations](https://textattack.readthedocs.io/en/latest/apidoc/textattack.transformations.html) implemented by TextAttack can be used to create an `Augmenter`. Note here that if we want to apply multiple transformations in the same time, they first need to be incooporated into a `CompositeTransformation` class.\n", + "2. **constraints**: [constraints](https://textattack.readthedocs.io/en/latest/apidoc/textattack.constraints.html#) determine whether or not a given augmentation is valid, consequently enhancing the quality of the augmentations. The default augmenter does not have any constraints but contraints can be supplied as a list to the Augmenter.\n", + "3. **pct_words_to_swap**: percentage of words to swap per augmented example. The default is set to 0.1 (10%).\n", + "4. **transformations_per_example** maximum number of augmentations per input. The default is set to 1 (one augmented sentence given one original input)\n", + "\n", + "An example of creating one's own augmenter is shown below. In this case, we are creating an augmenter with **RandomCharacterDeletion** and **WordSwapQWERTY** transformations, **RepeatModification** and **StopWordModification** constraints. A maximum of **50%** of the words could be purturbed, and 10 augmentations will be generated from each input sentence.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "5AXyxiLD4X93" + }, + "source": [ + "# import transformations, contraints, and the Augmenter\n", + "from textattack.transformations import WordSwapRandomCharacterDeletion\n", + "from textattack.transformations import WordSwapQWERTY\n", + "from textattack.transformations import CompositeTransformation\n", + "\n", + "from textattack.constraints.pre_transformation import RepeatModification\n", + "from textattack.constraints.pre_transformation import StopwordModification\n", + "\n", + "from textattack.augmentation import Augmenter" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "wFeXF_OL-vyw", + "outputId": "c041e77e-accd-4a58-88be-9b140dd0cd56" + }, + "source": [ + "# Set up transformation using CompositeTransformation()\n", + "transformation = CompositeTransformation(\n", + " [WordSwapRandomCharacterDeletion(), WordSwapQWERTY()]\n", + ")\n", + "# Set up constraints\n", + "constraints = [RepeatModification(), StopwordModification()]\n", + "# Create augmenter with specified parameters\n", + "augmenter = Augmenter(\n", + " transformation=transformation,\n", + " constraints=constraints,\n", + " pct_words_to_swap=0.5,\n", + " transformations_per_example=10,\n", + ")\n", + "s = \"What I cannot create, I do not understand.\"\n", + "# Augment!\n", + "augmenter.augment(s)" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "pkBqK5wYQKZu" - }, - "source": [ - "In the following example, we will use the `CheckListAugmenter` to showcase our augmentation recipes. The `CheckListAugmenter` augments words by using the transformation methods provided by CheckList INV testing, which combines **Name Replacement**, **Location Replacement**, **Number Alteration**, and **Contraction/Extension**. The original paper can be found here: [\"Beyond Accuracy: Behavioral Testing of NLP models with CheckList\" (Ribeiro et al., 2020)](https://arxiv.org/abs/2005.04118)" + "data": { + "text/plain": [ + "['Ahat I camnot reate, I do not unerstand.',\n", + " 'Ahat I cwnnot crewte, I do not undefstand.',\n", + " 'Wat I camnot vreate, I do not undefstand.',\n", + " 'Wha I annot crate, I do not unerstand.',\n", + " 'Whaf I canno creatr, I do not ynderstand.',\n", + " 'Wtat I cannor dreate, I do not understwnd.',\n", + " 'Wuat I canno ceate, I do not unferstand.',\n", + " 'hat I cnnot ceate, I do not undersand.',\n", + " 'hat I cnnot cfeate, I do not undfrstand.',\n", + " 'hat I cwnnot crfate, I do not ujderstand.']" ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b7020KtvEJ9-" + }, + "source": [ + "### Pre-built Augmentation Recipes\n", + "\n", + "In addition to creating our own augmenter, we could also use pre-built augmentation recipes to perturb datasets. These recipes are implemented from publishded papers and are very convenient to use. The list of available recipes can be found [here](https://textattack.readthedocs.io/en/latest/3recipes/augmenter_recipes.html).\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pkBqK5wYQKZu" + }, + "source": [ + "In the following example, we will use the `CheckListAugmenter` to showcase our augmentation recipes. The `CheckListAugmenter` augments words by using the transformation methods provided by CheckList INV testing, which combines **Name Replacement**, **Location Replacement**, **Number Alteration**, and **Contraction/Extension**. The original paper can be found here: [\"Beyond Accuracy: Behavioral Testing of NLP models with CheckList\" (Ribeiro et al., 2020)](https://arxiv.org/abs/2005.04118)" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "WkYiVH6lQedu", + "outputId": "cd5ffc65-ca80-45cd-b3bb-d023bcad09a4" + }, + "source": [ + "# import the CheckListAugmenter\n", + "from textattack.augmentation import CheckListAugmenter\n", + "\n", + "# Alter default values if desired\n", + "augmenter = CheckListAugmenter(pct_words_to_swap=0.2, transformations_per_example=5)\n", + "s = \"I'd love to go to Japan but the tickets are 500 dollars\"\n", + "# Augment\n", + "augmenter.augment(s)" + ], + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "WkYiVH6lQedu", - "outputId": "cd5ffc65-ca80-45cd-b3bb-d023bcad09a4" - }, - "source": [ - "# import the CheckListAugmenter\n", - "from textattack.augmentation import CheckListAugmenter\n", - "# Alter default values if desired\n", - "augmenter = CheckListAugmenter(pct_words_to_swap=0.2, transformations_per_example=5)\n", - "s = \"I'd love to go to Japan but the tickets are 500 dollars\"\n", - "# Augment\n", - "augmenter.augment(s)" - ], - "execution_count": null, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2021-06-09 16:58:41,816 --------------------------------------------------------------------------------\n", - "2021-06-09 16:58:41,817 The model key 'ner' now maps to 'https://huggingface.co/flair/ner-english' on the HuggingFace ModelHub\n", - "2021-06-09 16:58:41,817 - The most current version of the model is automatically downloaded from there.\n", - "2021-06-09 16:58:41,818 - (you can alternatively manually download the original model at https://nlp.informatik.hu-berlin.de/resources/models/ner/en-ner-conll03-v0.4.pt)\n", - "2021-06-09 16:58:41,818 --------------------------------------------------------------------------------\n", - "2021-06-09 16:58:41,906 loading file /u/lab/jy2ma/.flair/models/ner-english/4f4cdab26f24cb98b732b389e6cebc646c36f54cfd6e0b7d3b90b25656e4262f.8baa8ae8795f4df80b28e7f7b61d788ecbb057d1dc85aacb316f1bd02837a4a4\n" - ] - }, - { - "data": { - "text/plain": [ - "['I would love to go to Chile but the tickets are 500 dollars',\n", - " 'I would love to go to Japan but the tickets are 500 dollars',\n", - " 'I would love to go to Japan but the tickets are 75 dollars',\n", - " \"I'd love to go to Oman but the tickets are 373 dollars\",\n", - " \"I'd love to go to Vietnam but the tickets are 613 dollars\"]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "2021-06-09 16:58:41,816 --------------------------------------------------------------------------------\n", + "2021-06-09 16:58:41,817 The model key 'ner' now maps to 'https://huggingface.co/flair/ner-english' on the HuggingFace ModelHub\n", + "2021-06-09 16:58:41,817 - The most current version of the model is automatically downloaded from there.\n", + "2021-06-09 16:58:41,818 - (you can alternatively manually download the original model at https://nlp.informatik.hu-berlin.de/resources/models/ner/en-ner-conll03-v0.4.pt)\n", + "2021-06-09 16:58:41,818 --------------------------------------------------------------------------------\n", + "2021-06-09 16:58:41,906 loading file /u/lab/jy2ma/.flair/models/ner-english/4f4cdab26f24cb98b732b389e6cebc646c36f54cfd6e0b7d3b90b25656e4262f.8baa8ae8795f4df80b28e7f7b61d788ecbb057d1dc85aacb316f1bd02837a4a4\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "5vn22xrLST0H" - }, - "source": [ - "Note that the previous snippet of code is equivalent of running\n", - "\n", - "```\n", - "textattack augment --recipe checklist --pct-words-to-swap .1 --transformations-per-example 5 --exclude-original --interactive\n", - "```\n", - "in command line.\n" + "data": { + "text/plain": [ + "['I would love to go to Chile but the tickets are 500 dollars',\n", + " 'I would love to go to Japan but the tickets are 500 dollars',\n", + " 'I would love to go to Japan but the tickets are 75 dollars',\n", + " \"I'd love to go to Oman but the tickets are 373 dollars\",\n", + " \"I'd love to go to Vietnam but the tickets are 613 dollars\"]" ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5vn22xrLST0H" + }, + "source": [ + "Note that the previous snippet of code is equivalent of running\n", + "\n", + "```\n", + "textattack augment --recipe checklist --pct-words-to-swap .1 --transformations-per-example 5 --exclude-original --interactive\n", + "```\n", + "in command line.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VqfmCKz0XY-Y" + }, + "source": [ + "\n", + "\n", + "\n", + "Here's another example of using `WordNetAugmenter`. In this scenario, we enable `enable_advanced_metrics` to acquire perplexity and USE score, and enable `high_yield` to generate more examples in the same running time:\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "l2b-4scuXvkA", + "outputId": "5a372fd2-226a-4970-a2c9-c09bf2af56c2" + }, + "source": [ + "from textattack.augmentation import WordNetAugmenter\n", + "\n", + "augmenter = WordNetAugmenter(\n", + " pct_words_to_swap=0.4,\n", + " transformations_per_example=5,\n", + " high_yield=True,\n", + " enable_advanced_metrics=True,\n", + ")\n", + "s = \"I'd love to go to Japan but the tickets are 500 dollars\"\n", + "results = augmenter.augment(s)\n", + "print(f\"Average Original Perplexity Score: {results[1]['avg_original_perplexity']}\\n\")\n", + "print(f\"Average Augment Perplexity Score: {results[1]['avg_attack_perplexity']}\\n\")\n", + "print(f\"Average Augment USE Score: {results[2]['avg_attack_use_score']}\\n\")\n", + "print(f\"Augmentations:\")\n", + "results[0]" + ], + "execution_count": 9, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "VqfmCKz0XY-Y" - }, - "source": [ - "\n", - "\n", - "\n", - "Here's another example of using `WordNetAugmenter`. In this scenario, we enable `enable_advanced_metrics` to acquire perplexity and USE score, and enable `high_yield` to generate more examples in the same running time:\n" - ] + "output_type": "stream", + "name": "stderr", + "text": [ + "Token indices sequence length is longer than the specified maximum sequence length for this model (1091 > 1024). Running this sequence through the model will result in indexing errors\n" + ] }, { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "l2b-4scuXvkA", - "outputId": "5a372fd2-226a-4970-a2c9-c09bf2af56c2" - }, - "source": [ - "from textattack.augmentation import WordNetAugmenter\n", - "augmenter = WordNetAugmenter(pct_words_to_swap=0.4, transformations_per_example=5, high_yield=True, enable_advanced_metrics=True)\n", - "s = \"I'd love to go to Japan but the tickets are 500 dollars\"\n", - "results = augmenter.augment(s)\n", - "print(f\"Average Original Perplexity Score: {results[1]['avg_original_perplexity']}\\n\")\n", - "print(f\"Average Augment Perplexity Score: {results[1]['avg_attack_perplexity']}\\n\")\n", - "print(f\"Average Augment USE Score: {results[2]['avg_attack_use_score']}\\n\")\n", - "print(f\"Augmentations:\")\n", - "results[0]" - ], - "execution_count": 9, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "Token indices sequence length is longer than the specified maximum sequence length for this model (1091 > 1024). Running this sequence through the model will result in indexing errors\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Average Original Perplexity Score: 1.09\n", - "\n", - "Average Augment Perplexity Score: 3.17\n", - "\n", - "Average Augment USE Score: 0.72\n", - "\n", - "Augmentations:\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[\"I'd bang to operate to Japan but the ticket are 500 buck\",\n", - " \"I'd bang to plump to Nihon but the tickets are 500 clam\",\n", - " \"I'd bed to operate to Japan but the ticket are 500 buck\",\n", - " \"I'd bed to plump to Nihon but the tickets are 500 clam\",\n", - " \"I'd beloved to operate to Japan but the ticket are 500 buck\",\n", - " \"I'd beloved to plump to Nihon but the tickets are 500 clam\",\n", - " \"I'd bonk to operate to Japan but the ticket are 500 buck\",\n", - " \"I'd bonk to plump to Nihon but the tickets are 500 clam\",\n", - " \"I'd bonk to travel to Japan but the tag are 500 buck\",\n", - " \"I'd bonk to travel to Japan but the tag are 500 clam\",\n", - " \"I'd bonk to travel to Japan but the tag are 500 dollar\",\n", - " \"I'd bonk to travel to Japan but the tag are 500 dollars\",\n", - " \"I'd bonk to travel to Japan but the tag are D dollars\",\n", - " \"I'd bonk to travel to Japan but the tag are d dollars\",\n", - " \"I'd bonk to travel to Nihon but the tag are 500 dollars\",\n", - " \"I'd bonk to travel to Nippon but the tag are 500 dollars\",\n", - " \"I'd bonk to travel to japan but the tag are 500 dollars\",\n", - " \"I'd dear to operate to Japan but the ticket are 500 buck\",\n", - " \"I'd dear to plump to Nihon but the tickets are 500 clam\",\n", - " \"I'd dearest to operate to Japan but the ticket are 500 buck\",\n", - " \"I'd dearest to plump to Nihon but the tickets are 500 clam\",\n", - " \"I'd eff to operate to Japan but the ticket are 500 buck\",\n", - " \"I'd eff to plump to Nihon but the tickets are 500 clam\",\n", - " \"I'd enjoy to exit to Japan but the fine are 500 buck\",\n", - " \"I'd enjoy to exit to Japan but the slate are 500 buck\",\n", - " \"I'd enjoy to exit to Japan but the tag are 500 buck\",\n", - " \"I'd enjoy to exit to Japan but the ticket are 500 buck\",\n", - " \"I'd enjoy to exit to Japan but the tickets are 500 buck\",\n", - " \"I'd enjoy to exit to Japan but the tickets are D buck\",\n", - " \"I'd enjoy to exit to Japan but the tickets are d buck\",\n", - " \"I'd enjoy to exit to Nihon but the tickets are 500 buck\",\n", - " \"I'd enjoy to exit to Nippon but the tickets are 500 buck\",\n", - " \"I'd enjoy to exit to japan but the tickets are 500 buck\",\n", - " \"I'd enjoy to operate to Japan but the ticket are 500 buck\",\n", - " \"I'd enjoy to plump to Nihon but the tickets are 500 clam\",\n", - " \"I'd fuck to operate to Japan but the ticket are 500 buck\",\n", - " \"I'd fuck to plump to Nihon but the tickets are 500 clam\",\n", - " \"I'd honey to operate to Japan but the ticket are 500 buck\",\n", - " \"I'd honey to plump to Nihon but the tickets are 500 clam\",\n", - " \"I'd hump to operate to Japan but the ticket are 500 buck\",\n", - " \"I'd hump to plump to Nihon but the tickets are 500 clam\",\n", - " \"I'd jazz to operate to Japan but the ticket are 500 buck\",\n", - " \"I'd jazz to plump to Nihon but the tickets are 500 clam\",\n", - " \"I'd know to operate to Japan but the ticket are 500 buck\",\n", - " \"I'd know to plump to Nihon but the tickets are 500 clam\",\n", - " \"I'd love to operate to Japan but the ticket are 500 buck\",\n", - " \"I'd love to operate to Japan but the ticket are D buck\",\n", - " \"I'd love to operate to Japan but the ticket are d buck\",\n", - " \"I'd love to operate to Nihon but the ticket are 500 buck\",\n", - " \"I'd love to operate to Nippon but the ticket are 500 buck\",\n", - " \"I'd love to operate to japan but the ticket are 500 buck\",\n", - " \"I'd love to plump to Nihon but the fine are 500 clam\",\n", - " \"I'd love to plump to Nihon but the slate are 500 clam\",\n", - " \"I'd love to plump to Nihon but the tag are 500 clam\",\n", - " \"I'd love to plump to Nihon but the ticket are 500 clam\",\n", - " \"I'd love to plump to Nihon but the tickets are 500 clam\",\n", - " \"I'd love to plump to Nihon but the tickets are D clam\",\n", - " \"I'd love to plump to Nihon but the tickets are d clam\",\n", - " \"I'd lovemaking to operate to Japan but the ticket are 500 buck\",\n", - " \"I'd lovemaking to plump to Nihon but the tickets are 500 clam\",\n", - " \"I'd passion to fit to Japan but the fine are 500 buck\",\n", - " \"I'd passion to fit to Japan but the fine are 500 clam\",\n", - " \"I'd passion to fit to Japan but the fine are 500 dollar\",\n", - " \"I'd passion to fit to Japan but the fine are 500 dollars\",\n", - " \"I'd passion to fit to Japan but the fine are D dollars\",\n", - " \"I'd passion to fit to Japan but the fine are d dollars\",\n", - " \"I'd passion to fit to Nihon but the fine are 500 dollars\",\n", - " \"I'd passion to fit to Nippon but the fine are 500 dollars\",\n", - " \"I'd passion to fit to japan but the fine are 500 dollars\",\n", - " \"I'd passion to operate to Japan but the ticket are 500 buck\",\n", - " \"I'd passion to plump to Nihon but the tickets are 500 clam\",\n", - " \"I'd screw to operate to Japan but the ticket are 500 buck\",\n", - " \"I'd screw to plump to Nihon but the tickets are 500 clam\"]" - ] - }, - "metadata": {}, - "execution_count": 9 - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "Average Original Perplexity Score: 1.09\n", + "\n", + "Average Augment Perplexity Score: 3.17\n", + "\n", + "Average Augment USE Score: 0.72\n", + "\n", + "Augmentations:\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "whvwbHLVEJ-S" - }, - "source": [ - "### Conclusion\n", - "We have now went through the basics in running `Augmenter` by either creating a new augmenter from scratch or using a pre-built augmenter. This could be done in as few as 4 lines of code so please give it a try if you haven't already! 🐙" + "output_type": "execute_result", + "data": { + "text/plain": [ + "[\"I'd bang to operate to Japan but the ticket are 500 buck\",\n", + " \"I'd bang to plump to Nihon but the tickets are 500 clam\",\n", + " \"I'd bed to operate to Japan but the ticket are 500 buck\",\n", + " \"I'd bed to plump to Nihon but the tickets are 500 clam\",\n", + " \"I'd beloved to operate to Japan but the ticket are 500 buck\",\n", + " \"I'd beloved to plump to Nihon but the tickets are 500 clam\",\n", + " \"I'd bonk to operate to Japan but the ticket are 500 buck\",\n", + " \"I'd bonk to plump to Nihon but the tickets are 500 clam\",\n", + " \"I'd bonk to travel to Japan but the tag are 500 buck\",\n", + " \"I'd bonk to travel to Japan but the tag are 500 clam\",\n", + " \"I'd bonk to travel to Japan but the tag are 500 dollar\",\n", + " \"I'd bonk to travel to Japan but the tag are 500 dollars\",\n", + " \"I'd bonk to travel to Japan but the tag are D dollars\",\n", + " \"I'd bonk to travel to Japan but the tag are d dollars\",\n", + " \"I'd bonk to travel to Nihon but the tag are 500 dollars\",\n", + " \"I'd bonk to travel to Nippon but the tag are 500 dollars\",\n", + " \"I'd bonk to travel to japan but the tag are 500 dollars\",\n", + " \"I'd dear to operate to Japan but the ticket are 500 buck\",\n", + " \"I'd dear to plump to Nihon but the tickets are 500 clam\",\n", + " \"I'd dearest to operate to Japan but the ticket are 500 buck\",\n", + " \"I'd dearest to plump to Nihon but the tickets are 500 clam\",\n", + " \"I'd eff to operate to Japan but the ticket are 500 buck\",\n", + " \"I'd eff to plump to Nihon but the tickets are 500 clam\",\n", + " \"I'd enjoy to exit to Japan but the fine are 500 buck\",\n", + " \"I'd enjoy to exit to Japan but the slate are 500 buck\",\n", + " \"I'd enjoy to exit to Japan but the tag are 500 buck\",\n", + " \"I'd enjoy to exit to Japan but the ticket are 500 buck\",\n", + " \"I'd enjoy to exit to Japan but the tickets are 500 buck\",\n", + " \"I'd enjoy to exit to Japan but the tickets are D buck\",\n", + " \"I'd enjoy to exit to Japan but the tickets are d buck\",\n", + " \"I'd enjoy to exit to Nihon but the tickets are 500 buck\",\n", + " \"I'd enjoy to exit to Nippon but the tickets are 500 buck\",\n", + " \"I'd enjoy to exit to japan but the tickets are 500 buck\",\n", + " \"I'd enjoy to operate to Japan but the ticket are 500 buck\",\n", + " \"I'd enjoy to plump to Nihon but the tickets are 500 clam\",\n", + " \"I'd fuck to operate to Japan but the ticket are 500 buck\",\n", + " \"I'd fuck to plump to Nihon but the tickets are 500 clam\",\n", + " \"I'd honey to operate to Japan but the ticket are 500 buck\",\n", + " \"I'd honey to plump to Nihon but the tickets are 500 clam\",\n", + " \"I'd hump to operate to Japan but the ticket are 500 buck\",\n", + " \"I'd hump to plump to Nihon but the tickets are 500 clam\",\n", + " \"I'd jazz to operate to Japan but the ticket are 500 buck\",\n", + " \"I'd jazz to plump to Nihon but the tickets are 500 clam\",\n", + " \"I'd know to operate to Japan but the ticket are 500 buck\",\n", + " \"I'd know to plump to Nihon but the tickets are 500 clam\",\n", + " \"I'd love to operate to Japan but the ticket are 500 buck\",\n", + " \"I'd love to operate to Japan but the ticket are D buck\",\n", + " \"I'd love to operate to Japan but the ticket are d buck\",\n", + " \"I'd love to operate to Nihon but the ticket are 500 buck\",\n", + " \"I'd love to operate to Nippon but the ticket are 500 buck\",\n", + " \"I'd love to operate to japan but the ticket are 500 buck\",\n", + " \"I'd love to plump to Nihon but the fine are 500 clam\",\n", + " \"I'd love to plump to Nihon but the slate are 500 clam\",\n", + " \"I'd love to plump to Nihon but the tag are 500 clam\",\n", + " \"I'd love to plump to Nihon but the ticket are 500 clam\",\n", + " \"I'd love to plump to Nihon but the tickets are 500 clam\",\n", + " \"I'd love to plump to Nihon but the tickets are D clam\",\n", + " \"I'd love to plump to Nihon but the tickets are d clam\",\n", + " \"I'd lovemaking to operate to Japan but the ticket are 500 buck\",\n", + " \"I'd lovemaking to plump to Nihon but the tickets are 500 clam\",\n", + " \"I'd passion to fit to Japan but the fine are 500 buck\",\n", + " \"I'd passion to fit to Japan but the fine are 500 clam\",\n", + " \"I'd passion to fit to Japan but the fine are 500 dollar\",\n", + " \"I'd passion to fit to Japan but the fine are 500 dollars\",\n", + " \"I'd passion to fit to Japan but the fine are D dollars\",\n", + " \"I'd passion to fit to Japan but the fine are d dollars\",\n", + " \"I'd passion to fit to Nihon but the fine are 500 dollars\",\n", + " \"I'd passion to fit to Nippon but the fine are 500 dollars\",\n", + " \"I'd passion to fit to japan but the fine are 500 dollars\",\n", + " \"I'd passion to operate to Japan but the ticket are 500 buck\",\n", + " \"I'd passion to plump to Nihon but the tickets are 500 clam\",\n", + " \"I'd screw to operate to Japan but the ticket are 500 buck\",\n", + " \"I'd screw to plump to Nihon but the tickets are 500 clam\"]" ] + }, + "metadata": {}, + "execution_count": 9 } - ] + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "whvwbHLVEJ-S" + }, + "source": [ + "### Conclusion\n", + "We have now went through the basics in running `Augmenter` by either creating a new augmenter from scratch or using a pre-built augmenter. This could be done in as few as 4 lines of code so please give it a try if you haven't already! 🐙" + ] + } + ] } \ No newline at end of file diff --git a/docs/2notebook/4_Custom_Datasets_Word_Embedding.ipynb b/docs/2notebook/4_Custom_Datasets_Word_Embedding.ipynb index 6788b8d20..506b705ac 100644 --- a/docs/2notebook/4_Custom_Datasets_Word_Embedding.ipynb +++ b/docs/2notebook/4_Custom_Datasets_Word_Embedding.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# TextAttack with Custom Dataset and Word Embedding.\n", + "# TextAttack with Custom Dataset and Word Embedding.\n", "\n", "This tutorial will show you how to use textattack with any dataset and word embedding you may want to use\n" ] @@ -291,7 +291,9 @@ "from textattack.models.wrappers import HuggingFaceModelWrapper\n", "\n", "# https://huggingface.co/textattack\n", - "model = transformers.AutoModelForSequenceClassification.from_pretrained(\"textattack/albert-base-v2-imdb\")\n", + "model = transformers.AutoModelForSequenceClassification.from_pretrained(\n", + " \"textattack/albert-base-v2-imdb\"\n", + ")\n", "tokenizer = transformers.AutoTokenizer.from_pretrained(\"textattack/albert-base-v2-imdb\")\n", "# We wrap the model so it can be used by textattack\n", "model_wrapper = HuggingFaceModelWrapper(model, tokenizer)" @@ -319,13 +321,13 @@ "outputs": [], "source": [ "# dataset: An iterable of (text, ground_truth_output) pairs.\n", - "#0 means the review is negative\n", - "#1 means the review is positive\n", + "# 0 means the review is negative\n", + "# 1 means the review is positive\n", "custom_dataset = [\n", - " ('I hate this movie', 0), #A negative comment, with a negative label\n", - " ('I hate this movie', 1), #A negative comment, with a positive label\n", - " ('I love this movie', 0), #A positive comment, with a negative label\n", - " ('I love this movie', 1), #A positive comment, with a positive label\n", + " (\"I hate this movie\", 0), # A negative comment, with a negative label\n", + " (\"I hate this movie\", 1), # A negative comment, with a positive label\n", + " (\"I love this movie\", 0), # A positive comment, with a negative label\n", + " (\"I love this movie\", 1), # A positive comment, with a positive label\n", "]" ] }, @@ -360,7 +362,10 @@ "source": [ "from textattack import Attack\n", "from textattack.search_methods import GreedySearch\n", - "from textattack.constraints.pre_transformation import RepeatModification, StopwordModification\n", + "from textattack.constraints.pre_transformation import (\n", + " RepeatModification,\n", + " StopwordModification,\n", + ")\n", "from textattack.goal_functions import UntargetedClassification\n", "from textattack.transformations import WordSwapEmbedding\n", "from textattack.constraints.pre_transformation import RepeatModification\n", @@ -369,10 +374,9 @@ "# We'll use untargeted classification as the goal function.\n", "goal_function = UntargetedClassification(model_wrapper)\n", "# We'll to use our WordSwapEmbedding as the attack transformation.\n", - "transformation = WordSwapEmbedding() \n", + "transformation = WordSwapEmbedding()\n", "# We'll constrain modification of already modified indices and stopwords\n", - "constraints = [RepeatModification(),\n", - " StopwordModification()]\n", + "constraints = [RepeatModification(), StopwordModification()]\n", "# We'll use the Greedy search method\n", "search_method = GreedySearch()\n", "# Now, let's make the attack from the 4 components:\n", @@ -429,7 +433,7 @@ "source": [ "for example, label in custom_dataset:\n", " result = attack.attack(example, label)\n", - " print(result.__str__(color_method='ansi'))" + " print(result.__str__(color_method=\"ansi\"))" ] }, { @@ -453,10 +457,30 @@ "source": [ "from textattack.shared import WordEmbedding\n", "\n", - "embedding_matrix = [[1.0], [2.0], [3.0], [4.0]] #2-D array of shape N x D where N represents size of vocab and D is the dimension of embedding vectors.\n", - "word2index = {\"hate\":0, \"despise\":1, \"like\":2, \"love\":3} #dictionary that maps word to its index with in the embedding matrix.\n", - "index2word = {0:\"hate\", 1: \"despise\", 2:\"like\", 3:\"love\"} #dictionary that maps index to its word.\n", - "nn_matrix = [[0, 1, 2, 3], [1, 0, 2, 3], [2, 1, 3, 0], [3, 2, 1, 0]] #2-D integer array of shape N x K where N represents size of vocab and K is the top-K nearest neighbours.\n", + "embedding_matrix = [\n", + " [1.0],\n", + " [2.0],\n", + " [3.0],\n", + " [4.0],\n", + "] # 2-D array of shape N x D where N represents size of vocab and D is the dimension of embedding vectors.\n", + "word2index = {\n", + " \"hate\": 0,\n", + " \"despise\": 1,\n", + " \"like\": 2,\n", + " \"love\": 3,\n", + "} # dictionary that maps word to its index with in the embedding matrix.\n", + "index2word = {\n", + " 0: \"hate\",\n", + " 1: \"despise\",\n", + " 2: \"like\",\n", + " 3: \"love\",\n", + "} # dictionary that maps index to its word.\n", + "nn_matrix = [\n", + " [0, 1, 2, 3],\n", + " [1, 0, 2, 3],\n", + " [2, 1, 3, 0],\n", + " [3, 2, 1, 0],\n", + "] # 2-D integer array of shape N x K where N represents size of vocab and K is the top-K nearest neighbours.\n", "\n", "embedding = WordEmbedding(embedding_matrix, word2index, index2word, nn_matrix)" ] @@ -509,13 +533,13 @@ "source": [ "from textattack.attack_results import SuccessfulAttackResult\n", "\n", - "transformation = WordSwapEmbedding(3, embedding) \n", + "transformation = WordSwapEmbedding(3, embedding)\n", "\n", "attack = Attack(goal_function, constraints, transformation, search_method)\n", "\n", "for example, label in custom_dataset:\n", " result = attack.attack(example, label)\n", - " print(result.__str__(color_method='ansi'))" + " print(result.__str__(color_method=\"ansi\"))" ] } ], diff --git a/docs/2notebook/Example_0_tensorflow.ipynb b/docs/2notebook/Example_0_tensorflow.ipynb index f16aa295a..6c1fd55e6 100644 --- a/docs/2notebook/Example_0_tensorflow.ipynb +++ b/docs/2notebook/Example_0_tensorflow.ipynb @@ -232,22 +232,26 @@ "print(\"Version: \", tf.__version__)\n", "print(\"Eager mode: \", tf.executing_eagerly())\n", "print(\"Hub version: \", hub.__version__)\n", - "print(\"GPU is\", \"available\" if tf.config.list_physical_devices('GPU') else \"NOT AVAILABLE\")\n", + "print(\n", + " \"GPU is\", \"available\" if tf.config.list_physical_devices(\"GPU\") else \"NOT AVAILABLE\"\n", + ")\n", "\n", - "train_data, test_data = tfds.load(name=\"imdb_reviews\", split=[\"train\", \"test\"], \n", - " batch_size=-1, as_supervised=True)\n", + "train_data, test_data = tfds.load(\n", + " name=\"imdb_reviews\", split=[\"train\", \"test\"], batch_size=-1, as_supervised=True\n", + ")\n", "\n", "train_examples, train_labels = tfds.as_numpy(train_data)\n", "test_examples, test_labels = tfds.as_numpy(test_data)\n", "\n", "model = \"https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1\"\n", - "hub_layer = hub.KerasLayer(model, output_shape=[20], input_shape=[], \n", - " dtype=tf.string, trainable=True)\n", + "hub_layer = hub.KerasLayer(\n", + " model, output_shape=[20], input_shape=[], dtype=tf.string, trainable=True\n", + ")\n", "hub_layer(train_examples[:3])\n", "\n", "model = tf.keras.Sequential()\n", "model.add(hub_layer)\n", - "model.add(tf.keras.layers.Dense(16, activation='relu'))\n", + "model.add(tf.keras.layers.Dense(16, activation=\"relu\"))\n", "model.add(tf.keras.layers.Dense(1))\n", "\n", "model.summary()\n", @@ -258,16 +262,20 @@ "y_val = train_labels[:10000]\n", "partial_y_train = train_labels[10000:]\n", "\n", - "model.compile(optimizer='adam',\n", - " loss=tf.losses.BinaryCrossentropy(from_logits=True),\n", - " metrics=['accuracy'])\n", + "model.compile(\n", + " optimizer=\"adam\",\n", + " loss=tf.losses.BinaryCrossentropy(from_logits=True),\n", + " metrics=[\"accuracy\"],\n", + ")\n", "\n", - "history = model.fit(partial_x_train,\n", - " partial_y_train,\n", - " epochs=40,\n", - " batch_size=512,\n", - " validation_data=(x_val, y_val),\n", - " verbose=1)" + "history = model.fit(\n", + " partial_x_train,\n", + " partial_y_train,\n", + " epochs=40,\n", + " batch_size=512,\n", + " validation_data=(x_val, y_val),\n", + " verbose=1,\n", + ")" ] }, { @@ -300,6 +308,7 @@ "\n", "from textattack.models.wrappers import ModelWrapper\n", "\n", + "\n", "class CustomTensorFlowModelWrapper(ModelWrapper):\n", " def __init__(self, model):\n", " self.model = model\n", @@ -312,8 +321,8 @@ " logits = logits.squeeze(dim=-1)\n", " # Since this model only has a single output (between 0 or 1),\n", " # we have to add the second dimension.\n", - " final_preds = torch.stack((1-logits, logits), dim=1)\n", - " return final_preds\n" + " final_preds = torch.stack((1 - logits, logits), dim=1)\n", + " return final_preds" ] }, { @@ -350,7 +359,7 @@ } ], "source": [ - "CustomTensorFlowModelWrapper(model)(['I hate you so much', 'I love you'])" + "CustomTensorFlowModelWrapper(model)([\"I hate you so much\", \"I love you\"])" ] }, { diff --git a/docs/2notebook/Example_1_sklearn.ipynb b/docs/2notebook/Example_1_sklearn.ipynb index b50d52982..7826f18ff 100644 --- a/docs/2notebook/Example_1_sklearn.ipynb +++ b/docs/2notebook/Example_1_sklearn.ipynb @@ -119,8 +119,9 @@ } ], "source": [ - "import nltk # the Natural Language Toolkit\n", - "nltk.download('punkt') # The NLTK tokenizer" + "import nltk # the Natural Language Toolkit\n", + "\n", + "nltk.download(\"punkt\") # The NLTK tokenizer" ] }, { @@ -259,102 +260,139 @@ "# Nice to see additional metrics\n", "from sklearn.metrics import classification_report\n", "\n", - "def load_data(dataset_split='train'):\n", - " dataset = datasets.load_dataset('rotten_tomatoes')[dataset_split]\n", + "\n", + "def load_data(dataset_split=\"train\"):\n", + " dataset = datasets.load_dataset(\"rotten_tomatoes\")[dataset_split]\n", " # Open and import positve data\n", " df = pd.DataFrame()\n", - " df['Review'] = [review['text'] for review in dataset]\n", - " df['Sentiment'] = [review['label'] for review in dataset]\n", + " df[\"Review\"] = [review[\"text\"] for review in dataset]\n", + " df[\"Sentiment\"] = [review[\"label\"] for review in dataset]\n", " # Remove non-alphanumeric characters\n", - " df['Review'] = df['Review'].apply(lambda x: re.sub(\"[^a-zA-Z]\", ' ', str(x)))\n", + " df[\"Review\"] = df[\"Review\"].apply(lambda x: re.sub(\"[^a-zA-Z]\", \" \", str(x)))\n", " # Tokenize the training and testing data\n", " df_tokenized = tokenize_review(df)\n", " return df_tokenized\n", "\n", + "\n", "def tokenize_review(df):\n", " # Tokenize Reviews in training\n", - " tokened_reviews = [word_tokenize(rev) for rev in df['Review']]\n", + " tokened_reviews = [word_tokenize(rev) for rev in df[\"Review\"]]\n", " # Create word stems\n", " stemmed_tokens = []\n", " porter = PorterStemmer()\n", " for i in range(len(tokened_reviews)):\n", " stems = [porter.stem(token) for token in tokened_reviews[i]]\n", - " stems = ' '.join(stems)\n", + " stems = \" \".join(stems)\n", " stemmed_tokens.append(stems)\n", - " df.insert(1, column='Stemmed', value=stemmed_tokens)\n", + " df.insert(1, column=\"Stemmed\", value=stemmed_tokens)\n", " return df\n", "\n", + "\n", "def transform_BOW(training, testing, column_name):\n", - " vect = CountVectorizer(max_features=100, ngram_range=(1,3), stop_words=ENGLISH_STOP_WORDS)\n", + " vect = CountVectorizer(\n", + " max_features=100, ngram_range=(1, 3), stop_words=ENGLISH_STOP_WORDS\n", + " )\n", " vectFit = vect.fit(training[column_name])\n", " BOW_training = vectFit.transform(training[column_name])\n", - " BOW_training_df = pd.DataFrame(BOW_training.toarray(), columns=vect.get_feature_names())\n", + " BOW_training_df = pd.DataFrame(\n", + " BOW_training.toarray(), columns=vect.get_feature_names()\n", + " )\n", " BOW_testing = vectFit.transform(testing[column_name])\n", - " BOW_testing_Df = pd.DataFrame(BOW_testing.toarray(), columns=vect.get_feature_names())\n", + " BOW_testing_Df = pd.DataFrame(\n", + " BOW_testing.toarray(), columns=vect.get_feature_names()\n", + " )\n", " return vectFit, BOW_training_df, BOW_testing_Df\n", "\n", + "\n", "def transform_tfidf(training, testing, column_name):\n", - " Tfidf = TfidfVectorizer(ngram_range=(1,3), max_features=100, stop_words=ENGLISH_STOP_WORDS)\n", + " Tfidf = TfidfVectorizer(\n", + " ngram_range=(1, 3), max_features=100, stop_words=ENGLISH_STOP_WORDS\n", + " )\n", " Tfidf_fit = Tfidf.fit(training[column_name])\n", " Tfidf_training = Tfidf_fit.transform(training[column_name])\n", - " Tfidf_training_df = pd.DataFrame(Tfidf_training.toarray(), columns=Tfidf.get_feature_names())\n", + " Tfidf_training_df = pd.DataFrame(\n", + " Tfidf_training.toarray(), columns=Tfidf.get_feature_names()\n", + " )\n", " Tfidf_testing = Tfidf_fit.transform(testing[column_name])\n", - " Tfidf_testing_df = pd.DataFrame(Tfidf_testing.toarray(), columns=Tfidf.get_feature_names())\n", + " Tfidf_testing_df = pd.DataFrame(\n", + " Tfidf_testing.toarray(), columns=Tfidf.get_feature_names()\n", + " )\n", " return Tfidf_fit, Tfidf_training_df, Tfidf_testing_df\n", "\n", + "\n", "def add_augmenting_features(df):\n", - " tokened_reviews = [word_tokenize(rev) for rev in df['Review']]\n", + " tokened_reviews = [word_tokenize(rev) for rev in df[\"Review\"]]\n", " # Create feature that measures length of reviews\n", " len_tokens = []\n", " for i in range(len(tokened_reviews)):\n", " len_tokens.append(len(tokened_reviews[i]))\n", " len_tokens = preprocessing.scale(len_tokens)\n", - " df.insert(0, column='Lengths', value=len_tokens)\n", + " df.insert(0, column=\"Lengths\", value=len_tokens)\n", "\n", " # Create average word length (training)\n", - " Average_Words = [len(x)/(len(x.split())) for x in df['Review'].tolist()]\n", + " Average_Words = [len(x) / (len(x.split())) for x in df[\"Review\"].tolist()]\n", " Average_Words = preprocessing.scale(Average_Words)\n", - " df['averageWords'] = Average_Words\n", + " df[\"averageWords\"] = Average_Words\n", " return df\n", "\n", + "\n", "def build_model(X_train, y_train, X_test, y_test, name_of_test):\n", " log_reg = LogisticRegression(C=30, max_iter=200).fit(X_train, y_train)\n", " y_pred = log_reg.predict(X_test)\n", - " print('Training accuracy of '+name_of_test+': ', log_reg.score(X_train, y_train))\n", - " print('Testing accuracy of '+name_of_test+': ', log_reg.score(X_test, y_test))\n", + " print(\n", + " \"Training accuracy of \" + name_of_test + \": \", log_reg.score(X_train, y_train)\n", + " )\n", + " print(\"Testing accuracy of \" + name_of_test + \": \", log_reg.score(X_test, y_test))\n", " print(classification_report(y_test, y_pred)) # Evaluating prediction ability\n", " return log_reg\n", "\n", + "\n", "# Load training and test sets\n", "# Loading reviews into DF\n", - "df_train = load_data('train')\n", + "df_train = load_data(\"train\")\n", "\n", - "print('...successfully loaded training data')\n", - "print('Total length of training data: ', len(df_train))\n", + "print(\"...successfully loaded training data\")\n", + "print(\"Total length of training data: \", len(df_train))\n", "# Add augmenting features\n", "df_train = add_augmenting_features(df_train)\n", - "print('...augmented data with len_tokens and average_words')\n", + "print(\"...augmented data with len_tokens and average_words\")\n", "\n", "# Load test DF\n", - "df_test = load_data('test')\n", + "df_test = load_data(\"test\")\n", "\n", - "print('...successfully loaded testing data')\n", - "print('Total length of testing data: ', len(df_test))\n", + "print(\"...successfully loaded testing data\")\n", + "print(\"Total length of testing data: \", len(df_test))\n", "df_test = add_augmenting_features(df_test)\n", - "print('...augmented data with len_tokens and average_words')\n", + "print(\"...augmented data with len_tokens and average_words\")\n", "\n", "# Create unstemmed BOW features for training set\n", - "unstemmed_BOW_vect_fit, df_train_bow_unstem, df_test_bow_unstem = transform_BOW(df_train, df_test, 'Review')\n", - "print('...successfully created the unstemmed BOW data')\n", + "unstemmed_BOW_vect_fit, df_train_bow_unstem, df_test_bow_unstem = transform_BOW(\n", + " df_train, df_test, \"Review\"\n", + ")\n", + "print(\"...successfully created the unstemmed BOW data\")\n", "\n", "# Create TfIdf features for training set\n", - "unstemmed_tfidf_vect_fit, df_train_tfidf_unstem, df_test_tfidf_unstem = transform_tfidf(df_train, df_test, 'Review')\n", - "print('...successfully created the unstemmed TFIDF data')\n", + "unstemmed_tfidf_vect_fit, df_train_tfidf_unstem, df_test_tfidf_unstem = transform_tfidf(\n", + " df_train, df_test, \"Review\"\n", + ")\n", + "print(\"...successfully created the unstemmed TFIDF data\")\n", "\n", "# Running logistic regression on dataframes\n", - "bow_unstemmed = build_model(df_train_bow_unstem, df_train['Sentiment'], df_test_bow_unstem, df_test['Sentiment'], 'BOW Unstemmed')\n", + "bow_unstemmed = build_model(\n", + " df_train_bow_unstem,\n", + " df_train[\"Sentiment\"],\n", + " df_test_bow_unstem,\n", + " df_test[\"Sentiment\"],\n", + " \"BOW Unstemmed\",\n", + ")\n", "\n", - "tfidf_unstemmed = build_model(df_train_tfidf_unstem, df_train['Sentiment'], df_test_tfidf_unstem, df_test['Sentiment'], 'TFIDF Unstemmed')" + "tfidf_unstemmed = build_model(\n", + " df_train_tfidf_unstem,\n", + " df_train[\"Sentiment\"],\n", + " df_test_tfidf_unstem,\n", + " df_test[\"Sentiment\"],\n", + " \"TFIDF Unstemmed\",\n", + ")" ] }, { diff --git a/docs/2notebook/Example_2_allennlp.ipynb b/docs/2notebook/Example_2_allennlp.ipynb index 87c1bd76b..d8050355a 100644 --- a/docs/2notebook/Example_2_allennlp.ipynb +++ b/docs/2notebook/Example_2_allennlp.ipynb @@ -2519,8 +2519,8 @@ "Requirement already satisfied: tensorboardX in /usr/local/lib/python3.7/dist-packages (from textattack[tensorflow]) (2.4)\n", "Collecting tensorflow-text>=2\n", " Downloading tensorflow_text-2.6.0-cp37-cp37m-manylinux1_x86_64.whl (4.4 MB)\n", - "\u001B[K |████████████████████████████████| 4.4 MB 5.4 MB/s \n", - "\u001B[?25hRequirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from bert-score>=0.3.5->textattack[tensorflow]) (2.26.0)\n", + "\u001b[K |████████████████████████████████| 4.4 MB 5.4 MB/s \n", + "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from bert-score>=0.3.5->textattack[tensorflow]) (2.26.0)\n", "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from bert-score>=0.3.5->textattack[tensorflow]) (3.2.2)\n", "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.7/dist-packages (from bert-score>=0.3.5->textattack[tensorflow]) (21.0)\n", "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.9->bert-score>=0.3.5->textattack[tensorflow]) (2.4.7)\n", @@ -2628,9 +2628,12 @@ "\n", "import textattack\n", "\n", + "\n", "class AllenNLPModel(textattack.models.wrappers.ModelWrapper):\n", " def __init__(self):\n", - " self.predictor = Predictor.from_path(\"https://storage.googleapis.com/allennlp-public-models/basic_stanford_sentiment_treebank-2020.06.09.tar.gz\")\n", + " self.predictor = Predictor.from_path(\n", + " \"https://storage.googleapis.com/allennlp-public-models/basic_stanford_sentiment_treebank-2020.06.09.tar.gz\"\n", + " )\n", " self.model = self.predictor._model\n", " self.tokenizer = self.predictor._dataset_reader._tokenizer\n", "\n", @@ -2639,10 +2642,11 @@ " for text_input in text_input_list:\n", " outputs.append(self.predictor.predict(sentence=text_input))\n", " # For each output, outputs['logits'] contains the logits where\n", - " # index 0 corresponds to the positive and index 1 corresponds \n", + " # index 0 corresponds to the positive and index 1 corresponds\n", " # to the negative score. We reverse the outputs (by reverse slicing,\n", " # [::-1]) so that negative comes first and positive comes second.\n", - " return [output['logits'][::-1] for output in outputs]\n", + " return [output[\"logits\"][::-1] for output in outputs]\n", + "\n", "\n", "model_wrapper = AllenNLPModel()" ], @@ -2911,7 +2915,7 @@ "output_type": "stream", "name": "stderr", "text": [ - "textattack: Loading \u001B[94mdatasets\u001B[0m dataset \u001B[94mglue\u001B[0m, subset \u001B[94msst2\u001B[0m, split \u001B[94mtrain\u001B[0m.\n", + "textattack: Loading \u001b[94mdatasets\u001b[0m dataset \u001b[94mglue\u001b[0m, subset \u001b[94msst2\u001b[0m, split \u001b[94mtrain\u001b[0m.\n", "textattack: Unknown if model of class compatible with goal function .\n" ] }, diff --git a/docs/2notebook/Example_3_Keras.ipynb b/docs/2notebook/Example_3_Keras.ipynb index b4df581f3..25bb73a9d 100644 --- a/docs/2notebook/Example_3_Keras.ipynb +++ b/docs/2notebook/Example_3_Keras.ipynb @@ -66,7 +66,7 @@ "from keras.layers import Flatten\n", "from keras.layers import Dropout\n", "\n", - "from nltk.tokenize import word_tokenize, RegexpTokenizer\n" + "from nltk.tokenize import word_tokenize, RegexpTokenizer" ] }, { @@ -99,7 +99,6 @@ } ], "source": [ - "\n", "NUM_WORDS = 1000\n", "\n", "(x_train_tokens, y_train), (x_test_tokens, y_test) = tf.keras.datasets.imdb.load_data(\n", @@ -110,19 +109,20 @@ " seed=113,\n", " start_char=1,\n", " oov_char=2,\n", - " index_from=3\n", + " index_from=3,\n", ")\n", "\n", + "\n", "def transform(x):\n", - " x_transform = []\n", - " for i, word_indices in enumerate(x):\n", - " BoW_array = np.zeros((NUM_WORDS,))\n", - " for index in word_indices:\n", - " if index < len(BoW_array):\n", - " BoW_array[index] += 1\n", - " x_transform.append(BoW_array)\n", - " return np.array(x_transform)\n", - " \n", + " x_transform = []\n", + " for i, word_indices in enumerate(x):\n", + " BoW_array = np.zeros((NUM_WORDS,))\n", + " for index in word_indices:\n", + " if index < len(BoW_array):\n", + " BoW_array[index] += 1\n", + " x_transform.append(BoW_array)\n", + " return np.array(x_transform)\n", + "\n", "\n", "index = int(0.9 * len(x_train_tokens))\n", "x_train = transform(x_train_tokens)[:index]\n", @@ -132,9 +132,7 @@ "y_train = to_categorical(y_train)\n", "y_test = to_categorical(y_test)\n", "\n", - "vocabulary = tf.keras.datasets.imdb.get_word_index(\n", - " path='imdb_word_index.json'\n", - ")" + "vocabulary = tf.keras.datasets.imdb.get_word_index(path=\"imdb_word_index.json\")" ] }, { @@ -202,30 +200,23 @@ } ], "source": [ - "#Model Created with Keras\n", + "# Model Created with Keras\n", "model = Sequential()\n", - "model.add(Dense(512, activation='relu', input_dim=NUM_WORDS))\n", + "model.add(Dense(512, activation=\"relu\", input_dim=NUM_WORDS))\n", "model.add(Dropout(0.3))\n", - "model.add(Dense(100, activation='relu'))\n", - "model.add(Dense(2, activation='sigmoid'))\n", + "model.add(Dense(100, activation=\"relu\"))\n", + "model.add(Dense(2, activation=\"sigmoid\"))\n", "opt = keras.optimizers.Adam(learning_rate=0.00001)\n", "\n", - "model.compile(\n", - " optimizer = opt,\n", - " loss = \"binary_crossentropy\",\n", - " metrics = [\"accuracy\"]\n", - ")\n", + "model.compile(optimizer=opt, loss=\"binary_crossentropy\", metrics=[\"accuracy\"])\n", "\n", "\n", "results = model.fit(\n", - " x_train, y_train,\n", - " epochs= 18,\n", - " batch_size = 512,\n", - " validation_data = (x_test, y_test)\n", + " x_train, y_train, epochs=18, batch_size=512, validation_data=(x_test, y_test)\n", ")\n", "\n", "\n", - "print(results.history)\n" + "print(results.history)" ] }, { @@ -268,19 +259,18 @@ " self.model = model\n", "\n", " def __call__(self, text_input_list):\n", - " \n", - " x_transform = []\n", - " for i, review in enumerate(text_input_list):\n", - " tokens = [x.strip(\",\") for x in review.split()]\n", - " BoW_array = np.zeros((NUM_WORDS,))\n", - " for word in tokens:\n", - " if word in vocabulary:\n", - " if vocabulary[word] < len(BoW_array):\n", - " BoW_array[vocabulary[word]] += 1 \n", - " x_transform.append(BoW_array)\n", - " x_transform = np.array(x_transform)\n", - " prediction = self.model.predict(x_transform)\n", - " return prediction\n", + " x_transform = []\n", + " for i, review in enumerate(text_input_list):\n", + " tokens = [x.strip(\",\") for x in review.split()]\n", + " BoW_array = np.zeros((NUM_WORDS,))\n", + " for word in tokens:\n", + " if word in vocabulary:\n", + " if vocabulary[word] < len(BoW_array):\n", + " BoW_array[vocabulary[word]] += 1\n", + " x_transform.append(BoW_array)\n", + " x_transform = np.array(x_transform)\n", + " prediction = self.model.predict(x_transform)\n", + " return prediction\n", "\n", "\n", "CustomKerasModelWrapper(model)([\"bad bad bad bad bad\", \"good good good good\"])" diff --git a/docs/2notebook/Example_4_CamemBERT.ipynb b/docs/2notebook/Example_4_CamemBERT.ipynb index 04744625c..83268dd96 100644 --- a/docs/2notebook/Example_4_CamemBERT.ipynb +++ b/docs/2notebook/Example_4_CamemBERT.ipynb @@ -50,32 +50,35 @@ "\n", "# Quiet TensorFlow.\n", "import os\n", + "\n", "if \"TF_CPP_MIN_LOG_LEVEL\" not in os.environ:\n", " os.environ[\"TF_CPP_MIN_LOG_LEVEL\"] = \"3\"\n", "\n", "\n", "class HuggingFaceSentimentAnalysisPipelineWrapper(ModelWrapper):\n", - " \"\"\" Transformers sentiment analysis pipeline returns a list of responses\n", - " like \n", - " \n", - " [{'label': 'POSITIVE', 'score': 0.7817379832267761}]\n", - " \n", - " We need to convert that to a format TextAttack understands, like\n", - " \n", - " [[0.218262017, 0.7817379832267761]\n", + " \"\"\"Transformers sentiment analysis pipeline returns a list of responses\n", + " like\n", + "\n", + " [{'label': 'POSITIVE', 'score': 0.7817379832267761}]\n", + "\n", + " We need to convert that to a format TextAttack understands, like\n", + "\n", + " [[0.218262017, 0.7817379832267761]\n", " \"\"\"\n", + "\n", " def __init__(self, model):\n", - " self.model = model#pipeline = pipeline\n", + " self.model = model # pipeline = pipeline\n", + "\n", " def __call__(self, text_inputs):\n", " raw_outputs = self.model(text_inputs)\n", " outputs = []\n", " for output in raw_outputs:\n", - " score = output['score']\n", - " if output['label'] == 'POSITIVE':\n", - " outputs.append([1-score, score])\n", + " score = output[\"score\"]\n", + " if output[\"label\"] == \"POSITIVE\":\n", + " outputs.append([1 - score, score])\n", " else:\n", - " outputs.append([score, 1-score])\n", - " return np.array(outputs)\n" + " outputs.append([score, 1 - score])\n", + " return np.array(outputs)" ] }, { @@ -581,7 +584,7 @@ "# see https://github.com/TheophileBlard/french-sentiment-analysis-with-bert\n", "model = TFAutoModelForSequenceClassification.from_pretrained(\"tblard/tf-allocine\")\n", "tokenizer = AutoTokenizer.from_pretrained(\"tblard/tf-allocine\")\n", - "pipeline = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)\n", + "pipeline = pipeline(\"sentiment-analysis\", model=model, tokenizer=tokenizer)\n", "\n", "model_wrapper = HuggingFaceSentimentAnalysisPipelineWrapper(pipeline)\n", "\n", @@ -590,15 +593,15 @@ "#\n", "# WordNet defaults to english. Set the default language to French ('fra')\n", "#\n", - "# See \"Building a free French wordnet from multilingual resources\", \n", - "# E. L. R. A. (ELRA) (ed.), \n", + "# See \"Building a free French wordnet from multilingual resources\",\n", + "# E. L. R. A. (ELRA) (ed.),\n", "# Proceedings of the Sixth International Language Resources and Evaluation (LREC’08).\n", - "recipe.transformation.language = 'fra'\n", + "recipe.transformation.language = \"fra\"\n", "\n", - "dataset = HuggingFaceDataset('allocine', split='test')\n", + "dataset = HuggingFaceDataset(\"allocine\", split=\"test\")\n", "\n", "attacker = Attacker(recipe, dataset)\n", - "attacker.attack_dataset()\n" + "attacker.attack_dataset()" ] } ], diff --git a/docs/2notebook/Example_5_Explain_BERT.ipynb b/docs/2notebook/Example_5_Explain_BERT.ipynb index f56e398d2..2e2110778 100644 --- a/docs/2notebook/Example_5_Explain_BERT.ipynb +++ b/docs/2notebook/Example_5_Explain_BERT.ipynb @@ -87,7 +87,7 @@ } ], "source": [ - "#Optional: Install dependency CAptum\n", + "# Optional: Install dependency CAptum\n", "!pip3 install captum" ] }, @@ -99,7 +99,14 @@ }, "outputs": [], "source": [ - "from captum.attr import IntegratedGradients, LayerConductance, LayerIntegratedGradients, LayerDeepLiftShap, InternalInfluence, LayerGradientXActivation\n", + "from captum.attr import (\n", + " IntegratedGradients,\n", + " LayerConductance,\n", + " LayerIntegratedGradients,\n", + " LayerDeepLiftShap,\n", + " InternalInfluence,\n", + " LayerGradientXActivation,\n", + ")\n", "from captum.attr import visualization as viz" ] }, @@ -125,9 +132,9 @@ "source": [ "if torch.cuda.is_available():\n", " device = torch.device(\"cuda:0\")\n", - "else: \n", + "else:\n", " device = torch.device(\"cpu\")\n", - " \n", + "\n", "print(device)" ] }, @@ -245,9 +252,13 @@ ], "source": [ "dataset = HuggingFaceDataset(\"ag_news\", None, \"train\")\n", - "original_model = AutoModelForSequenceClassification.from_pretrained(\"textattack/bert-base-uncased-ag-news\")\n", - "original_tokenizer = AutoTokenizer.from_pretrained(\"textattack/bert-base-uncased-ag-news\")\n", - "model = HuggingFaceModelWrapper(original_model,original_tokenizer)" + "original_model = AutoModelForSequenceClassification.from_pretrained(\n", + " \"textattack/bert-base-uncased-ag-news\"\n", + ")\n", + "original_tokenizer = AutoTokenizer.from_pretrained(\n", + " \"textattack/bert-base-uncased-ag-news\"\n", + ")\n", + "model = HuggingFaceModelWrapper(original_model, original_tokenizer)" ] }, { @@ -258,7 +269,7 @@ }, "outputs": [], "source": [ - "def get_text(tokenizer,input_ids,token_type_ids,attention_mask):\n", + "def get_text(tokenizer, input_ids, token_type_ids, attention_mask):\n", " list_of_text = []\n", " number = input_ids.size()[0]\n", " for i in range(number):\n", @@ -268,35 +279,44 @@ " txt = tokenizer.decode(ii, skip_special_tokens=True)\n", " list_of_text.append(txt)\n", " return list_of_text\n", - " \n", - "sel =2\n", - "batch_encoded = model.tokenizer([dataset[i][0]['text'] for i in range(sel)], padding=True, return_tensors=\"pt\")\n", + "\n", + "\n", + "sel = 2\n", + "batch_encoded = model.tokenizer(\n", + " [dataset[i][0][\"text\"] for i in range(sel)], padding=True, return_tensors=\"pt\"\n", + ")\n", "batch_encoded.to(device)\n", "labels = [dataset[i][1] for i in range(sel)]\n", "\n", "clone = deepcopy(model)\n", "clone.model.to(device)\n", "\n", - "def calculate(input_ids,token_type_ids,attention_mask):\n", - " #convert back to list of text\n", - " return clone.model(input_ids,token_type_ids,attention_mask)[0]\n", - " \n", - "# x = calculate(**batch_encoded) \n", + "\n", + "def calculate(input_ids, token_type_ids, attention_mask):\n", + " # convert back to list of text\n", + " return clone.model(input_ids, token_type_ids, attention_mask)[0]\n", + "\n", + "\n", + "# x = calculate(**batch_encoded)\n", "\n", "lig = LayerIntegratedGradients(calculate, clone.model.bert.embeddings)\n", "# lig = InternalInfluence(calculate, clone.model.bert.embeddings)\n", "# lig = LayerGradientXActivation(calculate, clone.model.bert.embeddings)\n", "\n", - "bsl = torch.zeros(batch_encoded['input_ids'].size()).type(torch.LongTensor).to(device)\n", + "bsl = torch.zeros(batch_encoded[\"input_ids\"].size()).type(torch.LongTensor).to(device)\n", "labels = torch.tensor(labels).to(device)\n", "\n", - "attributions,delta = lig.attribute(inputs=batch_encoded['input_ids'],\n", - " baselines=bsl,\n", - " additional_forward_args=(batch_encoded['token_type_ids'], batch_encoded['attention_mask']),\n", - " n_steps = 10,\n", - " target = labels,\n", - " return_convergence_delta=True\n", - " )\n", + "attributions, delta = lig.attribute(\n", + " inputs=batch_encoded[\"input_ids\"],\n", + " baselines=bsl,\n", + " additional_forward_args=(\n", + " batch_encoded[\"token_type_ids\"],\n", + " batch_encoded[\"attention_mask\"],\n", + " ),\n", + " n_steps=10,\n", + " target=labels,\n", + " return_convergence_delta=True,\n", + ")\n", "atts = attributions.sum(dim=-1).squeeze(0)\n", "atts = atts / torch.norm(atts)" ] @@ -334,6 +354,7 @@ ], "source": [ "from textattack.attack_recipes import PWWSRen2019\n", + "\n", "attack = PWWSRen2019.build(model)" ] }, diff --git a/docs/2notebook/Example_6_Chinese_Attack.ipynb b/docs/2notebook/Example_6_Chinese_Attack.ipynb index b032306c7..66e93918f 100644 --- a/docs/2notebook/Example_6_Chinese_Attack.ipynb +++ b/docs/2notebook/Example_6_Chinese_Attack.ipynb @@ -1,2258 +1,3073 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "4b423038915e40158f9da4c07d09aad3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_3711cf0a18994cee8fc840d9a93cf5d3", - "IPY_MODEL_7f77bd7b8e5f45ae94cfc45f915c0c72", - "IPY_MODEL_fe0ca6138bc54b628c03e590c6e96aed" - ], - "layout": "IPY_MODEL_8b39363f69eb46009c5357263a65248c" - } - }, - "3711cf0a18994cee8fc840d9a93cf5d3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6b976fd913584da69456c1b6d53483cb", - "placeholder": "​", - "style": "IPY_MODEL_ea568ab2407f474da3b1f1b2540fa3a8", - "value": "Downloading: 100%" - } - }, - "7f77bd7b8e5f45ae94cfc45f915c0c72": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ff6b34a7e75b443593f3dca5d050cd52", - "max": 615, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_4f31972fd2fd44bbac063bb4b5075e98", - "value": 615 - } - }, - "fe0ca6138bc54b628c03e590c6e96aed": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7de1551891ec447ab6d80ea1de145f16", - "placeholder": "​", - "style": "IPY_MODEL_e5e2c0507c834887b80f5717c1e6d5f3", - "value": " 615/615 [00:00<00:00, 33.8kB/s]" - } - }, - "8b39363f69eb46009c5357263a65248c": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6b976fd913584da69456c1b6d53483cb": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ea568ab2407f474da3b1f1b2540fa3a8": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "ff6b34a7e75b443593f3dca5d050cd52": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4f31972fd2fd44bbac063bb4b5075e98": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "7de1551891ec447ab6d80ea1de145f16": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e5e2c0507c834887b80f5717c1e6d5f3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "588b1321a9274de6a8a9e86622d90be4": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_2436b07259a34ee18fe9c1007f7b615b", - "IPY_MODEL_98aac5a0baee4930bd461f2c5fd73f4a", - "IPY_MODEL_34607a8556794a5a86c18abe5bd7e5a5" - ], - "layout": "IPY_MODEL_f78f6701ce4f4b3b9ff0af925620f261" - } - }, - "2436b07259a34ee18fe9c1007f7b615b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a1e3fb5cceed4e95957a17192a641b69", - "placeholder": "​", - "style": "IPY_MODEL_83e9b14c4d354fdc80db4f8a881f19f3", - "value": "Downloading: 100%" - } - }, - "98aac5a0baee4930bd461f2c5fd73f4a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5f5457f292284dd8b914f45e26b2f749", - "max": 1115590446, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_2bb72191846f49528663680a315d8b01", - "value": 1115590446 - } - }, - "34607a8556794a5a86c18abe5bd7e5a5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_83eff532314e4edcbfe648b321e9a310", - "placeholder": "​", - "style": "IPY_MODEL_3d30e700d32443fdb37b5ab934d2d70a", - "value": " 1.04G/1.04G [00:25<00:00, 45.4MB/s]" - } - }, - "f78f6701ce4f4b3b9ff0af925620f261": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a1e3fb5cceed4e95957a17192a641b69": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "83e9b14c4d354fdc80db4f8a881f19f3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "5f5457f292284dd8b914f45e26b2f749": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2bb72191846f49528663680a315d8b01": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "83eff532314e4edcbfe648b321e9a310": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3d30e700d32443fdb37b5ab934d2d70a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "a132f09845a54cbe865cbe8159bb693e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_0af0e1eaea2f48c5b0fec6e550bd1baa", - "IPY_MODEL_dd6b0a5d9db245338a8fdb2ef5b29bf9", - "IPY_MODEL_58fc309041b54e94ae265167fa20d8d7" - ], - "layout": "IPY_MODEL_89dfd3fdc41e417a870901bc79e47495" - } - }, - "0af0e1eaea2f48c5b0fec6e550bd1baa": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_21472d1c4c8b494a8d3660b3320e9d4b", - "placeholder": "​", - "style": "IPY_MODEL_7511bb9ca5424674bb2350dff63c468a", - "value": "Downloading: 100%" - } - }, - "dd6b0a5d9db245338a8fdb2ef5b29bf9": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f6dd2c2cb4e346fe9af7026b5d2162e9", - "max": 5069051, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_a34ad57624fc422aa4832db3963298e6", - "value": 5069051 - } - }, - "58fc309041b54e94ae265167fa20d8d7": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5167daffe92e44d2acc2af2d9b9738df", - "placeholder": "​", - "style": "IPY_MODEL_acbfb34a353f41649675bd104069d14e", - "value": " 4.83M/4.83M [00:00<00:00, 12.1MB/s]" - } - }, - "89dfd3fdc41e417a870901bc79e47495": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "21472d1c4c8b494a8d3660b3320e9d4b": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7511bb9ca5424674bb2350dff63c468a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "f6dd2c2cb4e346fe9af7026b5d2162e9": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a34ad57624fc422aa4832db3963298e6": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "5167daffe92e44d2acc2af2d9b9738df": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "acbfb34a353f41649675bd104069d14e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "be070cb4a1624b0bb8f9b594c6b951a5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_2edb7130713d4e10a07bbf808abb9771", - "IPY_MODEL_5ae4c618f75d4ef9b65e5020fccb6d72", - "IPY_MODEL_138d8260e67f4bc58106b9b42f7abd12" - ], - "layout": "IPY_MODEL_d7621b5c619a4ce38ebe63924374cf78" - } - }, - "2edb7130713d4e10a07bbf808abb9771": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1b208b6df75f4a9e97faa4e3705a9442", - "placeholder": "​", - "style": "IPY_MODEL_a7871b8ec3ec40e7bbbe6a5f40b79f4a", - "value": "Downloading: 100%" - } - }, - "5ae4c618f75d4ef9b65e5020fccb6d72": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_aeb7ee752d834b4cbaa189419fd75dd4", - "max": 9096718, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_b47dfff73e73410aa89f65e3c5b0c366", - "value": 9096718 - } - }, - "138d8260e67f4bc58106b9b42f7abd12": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_bdf3571e59ef4a688ab89d4badda27b1", - "placeholder": "​", - "style": "IPY_MODEL_d3bab427b92144d6b9ce96eac18ceb89", - "value": " 8.68M/8.68M [00:00<00:00, 16.8MB/s]" - } - }, - "d7621b5c619a4ce38ebe63924374cf78": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "1b208b6df75f4a9e97faa4e3705a9442": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a7871b8ec3ec40e7bbbe6a5f40b79f4a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "aeb7ee752d834b4cbaa189419fd75dd4": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b47dfff73e73410aa89f65e3c5b0c366": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "bdf3571e59ef4a688ab89d4badda27b1": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d3bab427b92144d6b9ce96eac18ceb89": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - } - } - } + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "m83IiqVREJ96" - }, - "source": [ - "# Chinese Attack" - ] + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "4b423038915e40158f9da4c07d09aad3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3711cf0a18994cee8fc840d9a93cf5d3", + "IPY_MODEL_7f77bd7b8e5f45ae94cfc45f915c0c72", + "IPY_MODEL_fe0ca6138bc54b628c03e590c6e96aed" + ], + "layout": "IPY_MODEL_8b39363f69eb46009c5357263a65248c" + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "6UZ0d84hEJ98" - }, - "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QData/TextAttack/blob/master/docs/2notebook/Example_6_Chinese%20Attack.ipynb)\n", - "\n", - "\n", - "[![View Source on GitHub](https://img.shields.io/badge/github-view%20source-black.svg)](https://github.com/QData/TextAttack/blob/master/docs/2notebook/Example_6_Chinese%20Attack.ipynb)" - ] + "3711cf0a18994cee8fc840d9a93cf5d3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6b976fd913584da69456c1b6d53483cb", + "placeholder": "​", + "style": "IPY_MODEL_ea568ab2407f474da3b1f1b2540fa3a8", + "value": "Downloading: 100%" + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "tjqc2c5_7YaX" - }, - "source": [ - " Please remember to run the following in your notebook enviroment before running the tutorial codes:\n", - "\n", - "```\n", - "pip3 install textattack[tensorflow]\n", - "```\n", - "\n", - "\n", - "\n" - ] + "7f77bd7b8e5f45ae94cfc45f915c0c72": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ff6b34a7e75b443593f3dca5d050cd52", + "max": 615, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_4f31972fd2fd44bbac063bb4b5075e98", + "value": 615 + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "qZ5xnoevEJ99" - }, - "source": [ - "With a few additional modifications to the standard TextAttack commands, lanaguage models in Chinese can be attacked just as English models. Four transformations are available for either Chinese attack or augmentation:\n", - "\n", - "1. **ChineseHomophoneCharacterSwap**: transforms an input by replacing its words with substitions that share similar/identical pronounciation.\n", - "2. **ChineseMorphonymCharacterSwap**: transforms an input by replacing its words with substitions that share similar glyph structures.\n", - "3. **ChineseWordSwapHowNet**: transforms an input by replacing its words with synonyms provided by [OpenHownet](http://nlp.csai.tsinghua.edu.cn/).\n", - "4. **ChineseWordSwapMaskedLM**: transforms an input with potential replacements using a masked language model." - ] + "fe0ca6138bc54b628c03e590c6e96aed": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7de1551891ec447ab6d80ea1de145f16", + "placeholder": "​", + "style": "IPY_MODEL_e5e2c0507c834887b80f5717c1e6d5f3", + "value": " 615/615 [00:00<00:00, 33.8kB/s]" + } }, - { - "cell_type": "markdown", - "source": [ - "We begin with imports:" + "8b39363f69eb46009c5357263a65248c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6b976fd913584da69456c1b6d53483cb": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ea568ab2407f474da3b1f1b2540fa3a8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ff6b34a7e75b443593f3dca5d050cd52": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4f31972fd2fd44bbac063bb4b5075e98": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "7de1551891ec447ab6d80ea1de145f16": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e5e2c0507c834887b80f5717c1e6d5f3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "588b1321a9274de6a8a9e86622d90be4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_2436b07259a34ee18fe9c1007f7b615b", + "IPY_MODEL_98aac5a0baee4930bd461f2c5fd73f4a", + "IPY_MODEL_34607a8556794a5a86c18abe5bd7e5a5" ], - "metadata": { - "id": "2EP1DJylSfkD" - } + "layout": "IPY_MODEL_f78f6701ce4f4b3b9ff0af925620f261" + } }, - { - "cell_type": "code", - "metadata": { - "id": "5AXyxiLD4X93" - }, - "source": [ - "# Import required packages\n", - "import transformers\n", - "import string\n", - "import os\n", - "import pandas as pd\n", - "import datasets\n", - "\n", - "# Import classes required to build an Attacker\n", - "from textattack.models.wrappers import HuggingFaceModelWrapper\n", - "from textattack.search_methods import GreedyWordSwapWIR\n", - "from textattack.constraints.pre_transformation import RepeatModification, StopwordModification\n", - "from textattack.goal_functions import UntargetedClassification\n", - "\n", - "from textattack import Attack, Attacker, AttackArgs\n", - "from textattack.loggers import CSVLogger\n", - "from textattack.datasets import Dataset, HuggingFaceDataset\n", - "\n", - "# Import optional MUSE for higher quality examples\n", - "from textattack.constraints.semantics.sentence_encoders import MultilingualUniversalSentenceEncoder\n", - "muse = MultilingualUniversalSentenceEncoder(\n", - " threshold=0.9,\n", - " metric=\"cosine\",\n", - " compare_against_original=True,\n", - " window_size=15,\n", - " skip_text_shorter_than_window=True,\n", - ")\n", - "\n", - "# Import the transformations\n", - "\n", - "from textattack.transformations import CompositeTransformation\n", - "from textattack.transformations import ChineseWordSwapMaskedLM\n", - "from textattack.transformations import ChineseMorphonymCharacterSwap\n", - "from textattack.transformations import ChineseWordSwapHowNet\n", - "from textattack.transformations import ChineseHomophoneCharacterSwap" + "2436b07259a34ee18fe9c1007f7b615b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a1e3fb5cceed4e95957a17192a641b69", + "placeholder": "​", + "style": "IPY_MODEL_83e9b14c4d354fdc80db4f8a881f19f3", + "value": "Downloading: 100%" + } + }, + "98aac5a0baee4930bd461f2c5fd73f4a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5f5457f292284dd8b914f45e26b2f749", + "max": 1115590446, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_2bb72191846f49528663680a315d8b01", + "value": 1115590446 + } + }, + "34607a8556794a5a86c18abe5bd7e5a5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_83eff532314e4edcbfe648b321e9a310", + "placeholder": "​", + "style": "IPY_MODEL_3d30e700d32443fdb37b5ab934d2d70a", + "value": " 1.04G/1.04G [00:25<00:00, 45.4MB/s]" + } + }, + "f78f6701ce4f4b3b9ff0af925620f261": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a1e3fb5cceed4e95957a17192a641b69": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "83e9b14c4d354fdc80db4f8a881f19f3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5f5457f292284dd8b914f45e26b2f749": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2bb72191846f49528663680a315d8b01": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "83eff532314e4edcbfe648b321e9a310": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3d30e700d32443fdb37b5ab934d2d70a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a132f09845a54cbe865cbe8159bb693e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_0af0e1eaea2f48c5b0fec6e550bd1baa", + "IPY_MODEL_dd6b0a5d9db245338a8fdb2ef5b29bf9", + "IPY_MODEL_58fc309041b54e94ae265167fa20d8d7" ], - "execution_count": null, - "outputs": [] + "layout": "IPY_MODEL_89dfd3fdc41e417a870901bc79e47495" + } }, - { - "cell_type": "markdown", - "source": [ - "Models and datasets would also need to be set up:" + "0af0e1eaea2f48c5b0fec6e550bd1baa": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_21472d1c4c8b494a8d3660b3320e9d4b", + "placeholder": "​", + "style": "IPY_MODEL_7511bb9ca5424674bb2350dff63c468a", + "value": "Downloading: 100%" + } + }, + "dd6b0a5d9db245338a8fdb2ef5b29bf9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f6dd2c2cb4e346fe9af7026b5d2162e9", + "max": 5069051, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a34ad57624fc422aa4832db3963298e6", + "value": 5069051 + } + }, + "58fc309041b54e94ae265167fa20d8d7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5167daffe92e44d2acc2af2d9b9738df", + "placeholder": "​", + "style": "IPY_MODEL_acbfb34a353f41649675bd104069d14e", + "value": " 4.83M/4.83M [00:00<00:00, 12.1MB/s]" + } + }, + "89dfd3fdc41e417a870901bc79e47495": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "21472d1c4c8b494a8d3660b3320e9d4b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7511bb9ca5424674bb2350dff63c468a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f6dd2c2cb4e346fe9af7026b5d2162e9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a34ad57624fc422aa4832db3963298e6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "5167daffe92e44d2acc2af2d9b9738df": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "acbfb34a353f41649675bd104069d14e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "be070cb4a1624b0bb8f9b594c6b951a5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_2edb7130713d4e10a07bbf808abb9771", + "IPY_MODEL_5ae4c618f75d4ef9b65e5020fccb6d72", + "IPY_MODEL_138d8260e67f4bc58106b9b42f7abd12" ], - "metadata": { - "id": "1mSvCqhHSi0h" - } + "layout": "IPY_MODEL_d7621b5c619a4ce38ebe63924374cf78" + } + }, + "2edb7130713d4e10a07bbf808abb9771": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1b208b6df75f4a9e97faa4e3705a9442", + "placeholder": "​", + "style": "IPY_MODEL_a7871b8ec3ec40e7bbbe6a5f40b79f4a", + "value": "Downloading: 100%" + } + }, + "5ae4c618f75d4ef9b65e5020fccb6d72": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_aeb7ee752d834b4cbaa189419fd75dd4", + "max": 9096718, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b47dfff73e73410aa89f65e3c5b0c366", + "value": 9096718 + } + }, + "138d8260e67f4bc58106b9b42f7abd12": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bdf3571e59ef4a688ab89d4badda27b1", + "placeholder": "​", + "style": "IPY_MODEL_d3bab427b92144d6b9ce96eac18ceb89", + "value": " 8.68M/8.68M [00:00<00:00, 16.8MB/s]" + } + }, + "d7621b5c619a4ce38ebe63924374cf78": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1b208b6df75f4a9e97faa4e3705a9442": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a7871b8ec3ec40e7bbbe6a5f40b79f4a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "aeb7ee752d834b4cbaa189419fd75dd4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, + "b47dfff73e73410aa89f65e3c5b0c366": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "bdf3571e59ef4a688ab89d4badda27b1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d3bab427b92144d6b9ce96eac18ceb89": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "m83IiqVREJ96" + }, + "source": [ + "# Chinese Attack" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6UZ0d84hEJ98" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QData/TextAttack/blob/master/docs/2notebook/Example_6_Chinese%20Attack.ipynb)\n", + "\n", + "\n", + "[![View Source on GitHub](https://img.shields.io/badge/github-view%20source-black.svg)](https://github.com/QData/TextAttack/blob/master/docs/2notebook/Example_6_Chinese%20Attack.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tjqc2c5_7YaX" + }, + "source": [ + " Please remember to run the following in your notebook enviroment before running the tutorial codes:\n", + "\n", + "```\n", + "pip3 install textattack[tensorflow]\n", + "```\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qZ5xnoevEJ99" + }, + "source": [ + "With a few additional modifications to the standard TextAttack commands, lanaguage models in Chinese can be attacked just as English models. Four transformations are available for either Chinese attack or augmentation:\n", + "\n", + "1. **ChineseHomophoneCharacterSwap**: transforms an input by replacing its words with substitions that share similar/identical pronounciation.\n", + "2. **ChineseMorphonymCharacterSwap**: transforms an input by replacing its words with substitions that share similar glyph structures.\n", + "3. **ChineseWordSwapHowNet**: transforms an input by replacing its words with synonyms provided by [OpenHownet](http://nlp.csai.tsinghua.edu.cn/).\n", + "4. **ChineseWordSwapMaskedLM**: transforms an input with potential replacements using a masked language model." + ] + }, + { + "cell_type": "markdown", + "source": [ + "We begin with imports:" + ], + "metadata": { + "id": "2EP1DJylSfkD" + } + }, + { + "cell_type": "code", + "metadata": { + "id": "5AXyxiLD4X93" + }, + "source": [ + "# Import required packages\n", + "import transformers\n", + "import string\n", + "import os\n", + "import pandas as pd\n", + "import datasets\n", + "\n", + "# Import classes required to build an Attacker\n", + "from textattack.models.wrappers import HuggingFaceModelWrapper\n", + "from textattack.search_methods import GreedyWordSwapWIR\n", + "from textattack.constraints.pre_transformation import (\n", + " RepeatModification,\n", + " StopwordModification,\n", + ")\n", + "from textattack.goal_functions import UntargetedClassification\n", + "\n", + "from textattack import Attack, Attacker, AttackArgs\n", + "from textattack.loggers import CSVLogger\n", + "from textattack.datasets import Dataset, HuggingFaceDataset\n", + "\n", + "# Import optional MUSE for higher quality examples\n", + "from textattack.constraints.semantics.sentence_encoders import (\n", + " MultilingualUniversalSentenceEncoder,\n", + ")\n", + "\n", + "muse = MultilingualUniversalSentenceEncoder(\n", + " threshold=0.9,\n", + " metric=\"cosine\",\n", + " compare_against_original=True,\n", + " window_size=15,\n", + " skip_text_shorter_than_window=True,\n", + ")\n", + "\n", + "# Import the transformations\n", + "\n", + "from textattack.transformations import CompositeTransformation\n", + "from textattack.transformations import ChineseWordSwapMaskedLM\n", + "from textattack.transformations import ChineseMorphonymCharacterSwap\n", + "from textattack.transformations import ChineseWordSwapHowNet\n", + "from textattack.transformations import ChineseHomophoneCharacterSwap" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Models and datasets would also need to be set up:" + ], + "metadata": { + "id": "1mSvCqhHSi0h" + } + }, + { + "cell_type": "code", + "source": [ + "# In this example, we will attack a pre-trained entailment model from HugginFace (https://huggingface.co/uer/roberta-base-finetuned-chinanews-chinese)\n", + "tokenizer = transformers.AutoTokenizer.from_pretrained(\n", + " \"uer/roberta-base-finetuned-chinanews-chinese\"\n", + ")\n", + "model = transformers.AutoModelForSequenceClassification.from_pretrained(\n", + " \"uer/roberta-base-finetuned-chinanews-chinese\"\n", + ")\n", + "model_wrapper = HuggingFaceModelWrapper(model, tokenizer)\n", + "\n", + "# Set goal function\n", + "goal_function = UntargetedClassification(model_wrapper, query_budget=10000)\n", + "\n", + "# Set dataset from which we will generate adversraial examples\n", + "path = os.path.abspath(\"\")\n", + "path_list = path.split(os.sep)\n", + "temppath = os.path.normpath(\"examples/dataset/zh_sentiment/entailment_dataset.tsv\")\n", + "dataset = datasets.load_dataset(\"csv\", data_files=temppath, delimiter=\"\\t\")[\"train\"]\n", + "dataset = HuggingFaceDataset(\n", + " dataset,\n", + " dataset_columns=([\"text\"], \"label\"),\n", + " label_names=[\n", + " \"Mainland China politics\",\n", + " \"Hong Kong - Macau politics\",\n", + " \"International news\",\n", + " \"Financial news\",\n", + " \"Culture\",\n", + " \"Entertainment\",\n", + " \"Sports\",\n", + " ],\n", + ")" + ], + "metadata": { + "id": "CfnC9qUFPq9h" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "If this is your first time running Hownet, run this code block" + ], + "metadata": { + "id": "XfJVzCdRSr3d" + } + }, + { + "cell_type": "code", + "source": [ + "import OpenHowNet\n", + "\n", + "OpenHowNet.download()" + ], + "metadata": { + "id": "Hgal-PHeQwys" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "\n", + "Now we are ready to attack! With goal function, transformation, constraints, search method, and goal function, we create the Attacker as any other TextAttack attacks\n" + ], + "metadata": { + "id": "SrtoxdrMSZ0X" + } + }, + { + "cell_type": "code", + "source": [ + "# transformation, using ChineseWordSwapMaskedLM transformation in this example\n", + "\n", + "transformation = ChineseWordSwapMaskedLM()\n", + "\n", + "# constraint\n", + "stopwords = set(\n", + " [\n", + " \"、\",\n", + " \"。\",\n", + " \"〈\",\n", + " \"〉\",\n", + " \"《\",\n", + " \"》\",\n", + " \"一\",\n", + " \"一个\",\n", + " \"一些\",\n", + " \"一何\",\n", + " \"一切\",\n", + " \"一则\",\n", + " \"一方面\",\n", + " \"一旦\",\n", + " \"一来\",\n", + " \"一样\",\n", + " \"一种\",\n", + " \"一般\",\n", + " \"一转眼\",\n", + " \"七\",\n", + " \"万一\",\n", + " \"三\",\n", + " \"上\",\n", + " \"上下\",\n", + " \"下\",\n", + " \"不\",\n", + " \"不仅\",\n", + " \"不但\",\n", + " \"不光\",\n", + " \"不单\",\n", + " \"不只\",\n", + " \"不外乎\",\n", + " \"不如\",\n", + " \"不妨\",\n", + " \"不尽\",\n", + " \"不尽然\",\n", + " \"不得\",\n", + " \"不怕\",\n", + " \"不惟\",\n", + " \"不成\",\n", + " \"不拘\",\n", + " \"不料\",\n", + " \"不是\",\n", + " \"不比\",\n", + " \"不然\",\n", + " \"不特\",\n", + " \"不独\",\n", + " \"不管\",\n", + " \"不至于\",\n", + " \"不若\",\n", + " \"不论\",\n", + " \"不过\",\n", + " \"不问\",\n", + " \"与\",\n", + " \"与其\",\n", + " \"与其说\",\n", + " \"与否\",\n", + " \"与此同时\",\n", + " \"且\",\n", + " \"且不说\",\n", + " \"且说\",\n", + " \"两者\",\n", + " \"个\",\n", + " \"个别\",\n", + " \"中\",\n", + " \"临\",\n", + " \"为\",\n", + " \"为了\",\n", + " \"为什么\",\n", + " \"为何\",\n", + " \"为止\",\n", + " \"为此\",\n", + " \"为着\",\n", + " \"乃\",\n", + " \"乃至\",\n", + " \"乃至于\",\n", + " \"么\",\n", + " \"之\",\n", + " \"之一\",\n", + " \"之所以\",\n", + " \"之类\",\n", + " \"乌乎\",\n", + " \"乎\",\n", + " \"乘\",\n", + " \"九\",\n", + " \"也\",\n", + " \"也好\",\n", + " \"也罢\",\n", + " \"了\",\n", + " \"二\",\n", + " \"二来\",\n", + " \"于\",\n", + " \"于是\",\n", + " \"于是乎\",\n", + " \"云云\",\n", + " \"云尔\",\n", + " \"五\",\n", + " \"些\",\n", + " \"亦\",\n", + " \"人\",\n", + " \"人们\",\n", + " \"人家\",\n", + " \"什\",\n", + " \"什么\",\n", + " \"什么样\",\n", + " \"今\",\n", + " \"介于\",\n", + " \"仍\",\n", + " \"仍旧\",\n", + " \"从\",\n", + " \"从此\",\n", + " \"从而\",\n", + " \"他\",\n", + " \"他人\",\n", + " \"他们\",\n", + " \"他们们\",\n", + " \"以\",\n", + " \"以上\",\n", + " \"以为\",\n", + " \"以便\",\n", + " \"以免\",\n", + " \"以及\",\n", + " \"以故\",\n", + " \"以期\",\n", + " \"以来\",\n", + " \"以至\",\n", + " \"以至于\",\n", + " \"以致\",\n", + " \"们\",\n", + " \"任\",\n", + " \"任何\",\n", + " \"任凭\",\n", + " \"会\",\n", + " \"似的\",\n", + " \"但\",\n", + " \"但凡\",\n", + " \"但是\",\n", + " \"何\",\n", + " \"何以\",\n", + " \"何况\",\n", + " \"何处\",\n", + " \"何时\",\n", + " \"余外\",\n", + " \"作为\",\n", + " \"你\",\n", + " \"你们\",\n", + " \"使\",\n", + " \"使得\",\n", + " \"例如\",\n", + " \"依\",\n", + " \"依据\",\n", + " \"依照\",\n", + " \"便于\",\n", + " \"俺\",\n", + " \"俺们\",\n", + " \"倘\",\n", + " \"倘使\",\n", + " \"倘或\",\n", + " \"倘然\",\n", + " \"倘若\",\n", + " \"借\",\n", + " \"借傥然\",\n", + " \"假使\",\n", + " \"假如\",\n", + " \"假若\",\n", + " \"做\",\n", + " \"像\",\n", + " \"儿\",\n", + " \"先不先\",\n", + " \"光\",\n", + " \"光是\",\n", + " \"全体\",\n", + " \"全部\",\n", + " \"八\",\n", + " \"六\",\n", + " \"兮\",\n", + " \"共\",\n", + " \"关于\",\n", + " \"关于具体地说\",\n", + " \"其\",\n", + " \"其一\",\n", + " \"其中\",\n", + " \"其二\",\n", + " \"其他\",\n", + " \"其余\",\n", + " \"其它\",\n", + " \"其次\",\n", + " \"具体地说\",\n", + " \"具体说来\",\n", + " \"兼之\",\n", + " \"内\",\n", + " \"再\",\n", + " \"再其次\",\n", + " \"再则\",\n", + " \"再有\",\n", + " \"再者\",\n", + " \"再者说\",\n", + " \"再说\",\n", + " \"冒\",\n", + " \"冲\",\n", + " \"况且\",\n", + " \"几\",\n", + " \"几时\",\n", + " \"凡\",\n", + " \"凡是\",\n", + " \"凭\",\n", + " \"凭借\",\n", + " \"出于\",\n", + " \"出来\",\n", + " \"分\",\n", + " \"分别\",\n", + " \"则\",\n", + " \"则甚\",\n", + " \"别\",\n", + " \"别人\",\n", + " \"别处\",\n", + " \"别是\",\n", + " \"别的\",\n", + " \"别管\",\n", + " \"别说\",\n", + " \"到\",\n", + " \"前后\",\n", + " \"前此\",\n", + " \"前者\",\n", + " \"加之\",\n", + " \"加以\",\n", + " \"区\",\n", + " \"即\",\n", + " \"即令\",\n", + " \"即使\",\n", + " \"即便\",\n", + " \"即如\",\n", + " \"即或\",\n", + " \"即若\",\n", + " \"却\",\n", + " \"去\",\n", + " \"又\",\n", + " \"又及\",\n", + " \"及\",\n", + " \"及其\",\n", + " \"及至\",\n", + " \"反之\",\n", + " \"反而\",\n", + " \"反过来\",\n", + " \"反过来说\",\n", + " \"受到\",\n", + " \"另\",\n", + " \"另一方面\",\n", + " \"另外\",\n", + " \"另悉\",\n", + " \"只\",\n", + " \"只当\",\n", + " \"只怕\",\n", + " \"只是\",\n", + " \"只有\",\n", + " \"只消\",\n", + " \"只要\",\n", + " \"只限\",\n", + " \"叫\",\n", + " \"叮咚\",\n", + " \"可\",\n", + " \"可以\",\n", + " \"可是\",\n", + " \"可见\",\n", + " \"各\",\n", + " \"各个\",\n", + " \"各位\",\n", + " \"各种\",\n", + " \"各自\",\n", + " \"同\",\n", + " \"同时\",\n", + " \"后\",\n", + " \"后者\",\n", + " \"向\",\n", + " \"向使\",\n", + " \"向着\",\n", + " \"吓\",\n", + " \"吗\",\n", + " \"否则\",\n", + " \"吧\",\n", + " \"吧哒\",\n", + " \"含\",\n", + " \"吱\",\n", + " \"呀\",\n", + " \"呃\",\n", + " \"呕\",\n", + " \"呗\",\n", + " \"呜\",\n", + " \"呜呼\",\n", + " \"呢\",\n", + " \"呵\",\n", + " \"呵呵\",\n", + " \"呸\",\n", + " \"呼哧\",\n", + " \"咋\",\n", + " \"和\",\n", + " \"咚\",\n", + " \"咦\",\n", + " \"咧\",\n", + " \"咱\",\n", + " \"咱们\",\n", + " \"咳\",\n", + " \"哇\",\n", + " \"哈\",\n", + " \"哈哈\",\n", + " \"哉\",\n", + " \"哎\",\n", + " \"哎呀\",\n", + " \"哎哟\",\n", + " \"哗\",\n", + " \"哟\",\n", + " \"哦\",\n", + " \"哩\",\n", + " \"哪\",\n", + " \"哪个\",\n", + " \"哪些\",\n", + " \"哪儿\",\n", + " \"哪天\",\n", + " \"哪年\",\n", + " \"哪怕\",\n", + " \"哪样\",\n", + " \"哪边\",\n", + " \"哪里\",\n", + " \"哼\",\n", + " \"哼唷\",\n", + " \"唉\",\n", + " \"唯有\",\n", + " \"啊\",\n", + " \"啐\",\n", + " \"啥\",\n", + " \"啦\",\n", + " \"啪达\",\n", + " \"啷当\",\n", + " \"喂\",\n", + " \"喏\",\n", + " \"喔唷\",\n", + " \"喽\",\n", + " \"嗡\",\n", + " \"嗡嗡\",\n", + " \"嗬\",\n", + " \"嗯\",\n", + " \"嗳\",\n", + " \"嘎\",\n", + " \"嘎登\",\n", + " \"嘘\",\n", + " \"嘛\",\n", + " \"嘻\",\n", + " \"嘿\",\n", + " \"嘿嘿\",\n", + " \"四\",\n", + " \"因\",\n", + " \"因为\",\n", + " \"因了\",\n", + " \"因此\",\n", + " \"因着\",\n", + " \"因而\",\n", + " \"固然\",\n", + " \"在\",\n", + " \"在下\",\n", + " \"在于\",\n", + " \"地\",\n", + " \"基于\",\n", + " \"处在\",\n", + " \"多\",\n", + " \"多么\",\n", + " \"多少\",\n", + " \"大\",\n", + " \"大家\",\n", + " \"她\",\n", + " \"她们\",\n", + " \"好\",\n", + " \"如\",\n", + " \"如上\",\n", + " \"如上所述\",\n", + " \"如下\",\n", + " \"如何\",\n", + " \"如其\",\n", + " \"如同\",\n", + " \"如是\",\n", + " \"如果\",\n", + " \"如此\",\n", + " \"如若\",\n", + " \"始而\",\n", + " \"孰料\",\n", + " \"孰知\",\n", + " \"宁\",\n", + " \"宁可\",\n", + " \"宁愿\",\n", + " \"宁肯\",\n", + " \"它\",\n", + " \"它们\",\n", + " \"对\",\n", + " \"对于\",\n", + " \"对待\",\n", + " \"对方\",\n", + " \"对比\",\n", + " \"将\",\n", + " \"小\",\n", + " \"尔\",\n", + " \"尔后\",\n", + " \"尔尔\",\n", + " \"尚且\",\n", + " \"就\",\n", + " \"就是\",\n", + " \"就是了\",\n", + " \"就是说\",\n", + " \"就算\",\n", + " \"就要\",\n", + " \"尽\",\n", + " \"尽管\",\n", + " \"尽管如此\",\n", + " \"岂但\",\n", + " \"己\",\n", + " \"已\",\n", + " \"已矣\",\n", + " \"巴\",\n", + " \"巴巴\",\n", + " \"年\",\n", + " \"并\",\n", + " \"并且\",\n", + " \"庶乎\",\n", + " \"庶几\",\n", + " \"开外\",\n", + " \"开始\",\n", + " \"归\",\n", + " \"归齐\",\n", + " \"当\",\n", + " \"当地\",\n", + " \"当然\",\n", + " \"当着\",\n", + " \"彼\",\n", + " \"彼时\",\n", + " \"彼此\",\n", + " \"往\",\n", + " \"待\",\n", + " \"很\",\n", + " \"得\",\n", + " \"得了\",\n", + " \"怎\",\n", + " \"怎么\",\n", + " \"怎么办\",\n", + " \"怎么样\",\n", + " \"怎奈\",\n", + " \"怎样\",\n", + " \"总之\",\n", + " \"总的来看\",\n", + " \"总的来说\",\n", + " \"总的说来\",\n", + " \"总而言之\",\n", + " \"恰恰相反\",\n", + " \"您\",\n", + " \"惟其\",\n", + " \"慢说\",\n", + " \"我\",\n", + " \"我们\",\n", + " \"或\",\n", + " \"或则\",\n", + " \"或是\",\n", + " \"或曰\",\n", + " \"或者\",\n", + " \"截至\",\n", + " \"所\",\n", + " \"所以\",\n", + " \"所在\",\n", + " \"所幸\",\n", + " \"所有\",\n", + " \"才\",\n", + " \"才能\",\n", + " \"打\",\n", + " \"打从\",\n", + " \"把\",\n", + " \"抑或\",\n", + " \"拿\",\n", + " \"按\",\n", + " \"按照\",\n", + " \"换句话说\",\n", + " \"换言之\",\n", + " \"据\",\n", + " \"据此\",\n", + " \"接着\",\n", + " \"故\",\n", + " \"故此\",\n", + " \"故而\",\n", + " \"旁人\",\n", + " \"无\",\n", + " \"无宁\",\n", + " \"无论\",\n", + " \"既\",\n", + " \"既往\",\n", + " \"既是\",\n", + " \"既然\",\n", + " \"日\",\n", + " \"时\",\n", + " \"时候\",\n", + " \"是\",\n", + " \"是以\",\n", + " \"是的\",\n", + " \"更\",\n", + " \"曾\",\n", + " \"替\",\n", + " \"替代\",\n", + " \"最\",\n", + " \"月\",\n", + " \"有\",\n", + " \"有些\",\n", + " \"有关\",\n", + " \"有及\",\n", + " \"有时\",\n", + " \"有的\",\n", + " \"望\",\n", + " \"朝\",\n", + " \"朝着\",\n", + " \"本\",\n", + " \"本人\",\n", + " \"本地\",\n", + " \"本着\",\n", + " \"本身\",\n", + " \"来\",\n", + " \"来着\",\n", + " \"来自\",\n", + " \"来说\",\n", + " \"极了\",\n", + " \"果然\",\n", + " \"果真\",\n", + " \"某\",\n", + " \"某个\",\n", + " \"某些\",\n", + " \"某某\",\n", + " \"根据\",\n", + " \"欤\",\n", + " \"正值\",\n", + " \"正如\",\n", + " \"正巧\",\n", + " \"正是\",\n", + " \"此\",\n", + " \"此地\",\n", + " \"此处\",\n", + " \"此外\",\n", + " \"此时\",\n", + " \"此次\",\n", + " \"此间\",\n", + " \"毋宁\",\n", + " \"每\",\n", + " \"每当\",\n", + " \"比\",\n", + " \"比及\",\n", + " \"比如\",\n", + " \"比方\",\n", + " \"没奈何\",\n", + " \"沿\",\n", + " \"沿着\",\n", + " \"漫说\",\n", + " \"点\",\n", + " \"焉\",\n", + " \"然则\",\n", + " \"然后\",\n", + " \"然而\",\n", + " \"照\",\n", + " \"照着\",\n", + " \"犹且\",\n", + " \"犹自\",\n", + " \"甚且\",\n", + " \"甚么\",\n", + " \"甚或\",\n", + " \"甚而\",\n", + " \"甚至\",\n", + " \"甚至于\",\n", + " \"用\",\n", + " \"用来\",\n", + " \"由\",\n", + " \"由于\",\n", + " \"由是\",\n", + " \"由此\",\n", + " \"由此可见\",\n", + " \"的\",\n", + " \"的确\",\n", + " \"的话\",\n", + " \"直到\",\n", + " \"相对而言\",\n", + " \"省得\",\n", + " \"看\",\n", + " \"眨眼\",\n", + " \"着\",\n", + " \"着呢\",\n", + " \"矣\",\n", + " \"矣乎\",\n", + " \"矣哉\",\n", + " \"离\",\n", + " \"秒\",\n", + " \"称\",\n", + " \"竟而\",\n", + " \"第\",\n", + " \"等\",\n", + " \"等到\",\n", + " \"等等\",\n", + " \"简言之\",\n", + " \"管\",\n", + " \"类如\",\n", + " \"紧接着\",\n", + " \"纵\",\n", + " \"纵令\",\n", + " \"纵使\",\n", + " \"纵然\",\n", + " \"经\",\n", + " \"经过\",\n", + " \"结果\",\n", + " \"给\",\n", + " \"继之\",\n", + " \"继后\",\n", + " \"继而\",\n", + " \"综上所述\",\n", + " \"罢了\",\n", + " \"者\",\n", + " \"而\",\n", + " \"而且\",\n", + " \"而况\",\n", + " \"而后\",\n", + " \"而外\",\n", + " \"而已\",\n", + " \"而是\",\n", + " \"而言\",\n", + " \"能\",\n", + " \"能否\",\n", + " \"腾\",\n", + " \"自\",\n", + " \"自个儿\",\n", + " \"自从\",\n", + " \"自各儿\",\n", + " \"自后\",\n", + " \"自家\",\n", + " \"自己\",\n", + " \"自打\",\n", + " \"自身\",\n", + " \"至\",\n", + " \"至于\",\n", + " \"至今\",\n", + " \"至若\",\n", + " \"致\",\n", + " \"般的\",\n", + " \"若\",\n", + " \"若夫\",\n", + " \"若是\",\n", + " \"若果\",\n", + " \"若非\",\n", + " \"莫不然\",\n", + " \"莫如\",\n", + " \"莫若\",\n", + " \"虽\",\n", + " \"虽则\",\n", + " \"虽然\",\n", + " \"虽说\",\n", + " \"被\",\n", + " \"要\",\n", + " \"要不\",\n", + " \"要不是\",\n", + " \"要不然\",\n", + " \"要么\",\n", + " \"要是\",\n", + " \"譬喻\",\n", + " \"譬如\",\n", + " \"让\",\n", + " \"许多\",\n", + " \"论\",\n", + " \"设使\",\n", + " \"设或\",\n", + " \"设若\",\n", + " \"诚如\",\n", + " \"诚然\",\n", + " \"该\",\n", + " \"说\",\n", + " \"说来\",\n", + " \"请\",\n", + " \"诸\",\n", + " \"诸位\",\n", + " \"诸如\",\n", + " \"谁\",\n", + " \"谁人\",\n", + " \"谁料\",\n", + " \"谁知\",\n", + " \"贼死\",\n", + " \"赖以\",\n", + " \"赶\",\n", + " \"起\",\n", + " \"起见\",\n", + " \"趁\",\n", + " \"趁着\",\n", + " \"越是\",\n", + " \"距\",\n", + " \"跟\",\n", + " \"较\",\n", + " \"较之\",\n", + " \"边\",\n", + " \"过\",\n", + " \"还\",\n", + " \"还是\",\n", + " \"还有\",\n", + " \"还要\",\n", + " \"这\",\n", + " \"这一来\",\n", + " \"这个\",\n", + " \"这么\",\n", + " \"这么些\",\n", + " \"这么样\",\n", + " \"这么点儿\",\n", + " \"这些\",\n", + " \"这会儿\",\n", + " \"这儿\",\n", + " \"这就是说\",\n", + " \"这时\",\n", + " \"这样\",\n", + " \"这次\",\n", + " \"这般\",\n", + " \"这边\",\n", + " \"这里\",\n", + " \"进而\",\n", + " \"连\",\n", + " \"连同\",\n", + " \"逐步\",\n", + " \"通过\",\n", + " \"遵循\",\n", + " \"遵照\",\n", + " \"那\",\n", + " \"那个\",\n", + " \"那么\",\n", + " \"那么些\",\n", + " \"那么样\",\n", + " \"那些\",\n", + " \"那会儿\",\n", + " \"那儿\",\n", + " \"那时\",\n", + " \"那样\",\n", + " \"那般\",\n", + " \"那边\",\n", + " \"那里\",\n", + " \"都\",\n", + " \"鄙人\",\n", + " \"鉴于\",\n", + " \"针对\",\n", + " \"阿\",\n", + " \"除\",\n", + " \"除了\",\n", + " \"除外\",\n", + " \"除开\",\n", + " \"除此之外\",\n", + " \"除非\",\n", + " \"随\",\n", + " \"随后\",\n", + " \"随时\",\n", + " \"随着\",\n", + " \"难道说\",\n", + " \"零\",\n", + " \"非\",\n", + " \"非但\",\n", + " \"非徒\",\n", + " \"非特\",\n", + " \"非独\",\n", + " \"靠\",\n", + " \"顺\",\n", + " \"顺着\",\n", + " \"首先\",\n", + " \"︿\",\n", + " \"!\",\n", + " \"#\",\n", + " \"$\",\n", + " \"%\",\n", + " \"&\",\n", + " \"(\",\n", + " \")\",\n", + " \"*\",\n", + " \"+\",\n", + " \",\",\n", + " \"0\",\n", + " \"1\",\n", + " \"2\",\n", + " \"3\",\n", + " \"4\",\n", + " \"5\",\n", + " \"6\",\n", + " \"7\",\n", + " \"8\",\n", + " \"9\",\n", + " \":\",\n", + " \";\",\n", + " \"<\",\n", + " \">\",\n", + " \"?\",\n", + " \"@\",\n", + " \"[\",\n", + " \"]\",\n", + " \"{\",\n", + " \"|\",\n", + " \"}\",\n", + " \"~\",\n", + " \"¥\",\n", + " ]\n", + ")\n", + "stopwords = stopwords.union(set(string.punctuation))\n", + "constraints = [RepeatModification(), StopwordModification(stopwords=stopwords)]\n", + "\n", + "# search method\n", + "search_method = GreedyWordSwapWIR(wir_method=\"weighted-saliency\")\n", + "\n", + "# attack!\n", + "attack = Attack(goal_function, constraints, transformation, search_method)\n", + "attack_args = AttackArgs(num_examples=20)\n", + "attacker = Attacker(attack, dataset, attack_args)\n", + "attack_results = attacker.attack_dataset()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "4b423038915e40158f9da4c07d09aad3", + "3711cf0a18994cee8fc840d9a93cf5d3", + "7f77bd7b8e5f45ae94cfc45f915c0c72", + "fe0ca6138bc54b628c03e590c6e96aed", + "8b39363f69eb46009c5357263a65248c", + "6b976fd913584da69456c1b6d53483cb", + "ea568ab2407f474da3b1f1b2540fa3a8", + "ff6b34a7e75b443593f3dca5d050cd52", + "4f31972fd2fd44bbac063bb4b5075e98", + "7de1551891ec447ab6d80ea1de145f16", + "e5e2c0507c834887b80f5717c1e6d5f3", + "588b1321a9274de6a8a9e86622d90be4", + "2436b07259a34ee18fe9c1007f7b615b", + "98aac5a0baee4930bd461f2c5fd73f4a", + "34607a8556794a5a86c18abe5bd7e5a5", + "f78f6701ce4f4b3b9ff0af925620f261", + "a1e3fb5cceed4e95957a17192a641b69", + "83e9b14c4d354fdc80db4f8a881f19f3", + "5f5457f292284dd8b914f45e26b2f749", + "2bb72191846f49528663680a315d8b01", + "83eff532314e4edcbfe648b321e9a310", + "3d30e700d32443fdb37b5ab934d2d70a", + "a132f09845a54cbe865cbe8159bb693e", + "0af0e1eaea2f48c5b0fec6e550bd1baa", + "dd6b0a5d9db245338a8fdb2ef5b29bf9", + "58fc309041b54e94ae265167fa20d8d7", + "89dfd3fdc41e417a870901bc79e47495", + "21472d1c4c8b494a8d3660b3320e9d4b", + "7511bb9ca5424674bb2350dff63c468a", + "f6dd2c2cb4e346fe9af7026b5d2162e9", + "a34ad57624fc422aa4832db3963298e6", + "5167daffe92e44d2acc2af2d9b9738df", + "acbfb34a353f41649675bd104069d14e", + "be070cb4a1624b0bb8f9b594c6b951a5", + "2edb7130713d4e10a07bbf808abb9771", + "5ae4c618f75d4ef9b65e5020fccb6d72", + "138d8260e67f4bc58106b9b42f7abd12", + "d7621b5c619a4ce38ebe63924374cf78", + "1b208b6df75f4a9e97faa4e3705a9442", + "a7871b8ec3ec40e7bbbe6a5f40b79f4a", + "aeb7ee752d834b4cbaa189419fd75dd4", + "b47dfff73e73410aa89f65e3c5b0c366", + "bdf3571e59ef4a688ab89d4badda27b1", + "d3bab427b92144d6b9ce96eac18ceb89" + ] + }, + "id": "C_0Z8njnRblT", + "outputId": "3890d784-de7f-4b70-f984-cbc9e0c7f700" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "source": [ - "# In this example, we will attack a pre-trained entailment model from HugginFace (https://huggingface.co/uer/roberta-base-finetuned-chinanews-chinese)\n", - "tokenizer = transformers.AutoTokenizer.from_pretrained('uer/roberta-base-finetuned-chinanews-chinese')\n", - "model = transformers.AutoModelForSequenceClassification.from_pretrained('uer/roberta-base-finetuned-chinanews-chinese')\n", - "model_wrapper = HuggingFaceModelWrapper(model, tokenizer)\n", - "\n", - "# Set goal function\n", - "goal_function = UntargetedClassification(model_wrapper, query_budget=10000)\n", - "\n", - "# Set dataset from which we will generate adversraial examples\n", - "path = os.path.abspath('')\n", - "path_list = path.split(os.sep)\n", - "temppath = os.path.normpath('examples/dataset/zh_sentiment/entailment_dataset.tsv')\n", - "dataset = datasets.load_dataset('csv', data_files=temppath, delimiter=\"\\t\")[\"train\"]\n", - "dataset = HuggingFaceDataset(\n", - " dataset,\n", - " dataset_columns=([\"text\"], \"label\"),\n", - " label_names=[\"Mainland China politics\", \"Hong Kong - Macau politics\", \"International news\", \"Financial news\", \"Culture\", \"Entertainment\", \"Sports\"]\n", - " )" + "output_type": "display_data", + "data": { + "text/plain": [ + "Downloading: 0%| | 0.00/615 [00:00 [[[FAILED]]]\n", - "\n", - "林书豪新秀赛上甘心\"跑龙套\" 自称仍是底薪球员\n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 0 / 1 / 0 / 1: 10%|█ | 2/20 [06:55<1:02:18, 207.69s/it]\u001b[A\n", - "[Succeeded / Failed / Skipped / Total] 0 / 2 / 0 / 2: 10%|█ | 2/20 [06:55<1:02:18, 207.70s/it]\u001b[A" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 2 ---------------------------------------------\n", - "[[Culture (100%)]] --> [[[FAILED]]]\n", - "\n", - "成都现“真人图书馆”:无书“借人”给你读\n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 0 / 2 / 0 / 2: 15%|█▌ | 3/20 [07:01<39:50, 140.61s/it] \u001b[A\n", - "[Succeeded / Failed / Skipped / Total] 0 / 2 / 1 / 3: 15%|█▌ | 3/20 [07:01<39:50, 140.61s/it]\u001b[A" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 3 ---------------------------------------------\n", - "[[Mainland china politics (57%)]] --> [[[SKIPPED]]]\n", - "\n", - "中国经济走向更趋稳健务实\n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 0 / 2 / 1 / 3: 20%|██ | 4/20 [11:33<46:12, 173.28s/it]\u001b[A\n", - "[Succeeded / Failed / Skipped / Total] 0 / 3 / 1 / 4: 20%|██ | 4/20 [11:33<46:12, 173.28s/it]\u001b[A" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 4 ---------------------------------------------\n", - "[[Sports (100%)]] --> [[[FAILED]]]\n", - "\n", - "国际田联世界挑战赛 罗伯斯迎来赛季第三冠\n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 0 / 3 / 1 / 4: 25%|██▌ | 5/20 [14:52<44:36, 178.44s/it]\u001b[A" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 5 ---------------------------------------------\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 1 / 3 / 1 / 5: 25%|██▌ | 5/20 [14:53<44:39, 178.62s/it]\u001b[A" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "[[International news (66%)]] --> [[Entertainment (68%)]]\n", - "\n", - "德国一电视台合成“默克尔头巾照”惹争议\n", - "\n", - "德国一电视台合成“性感头巾照”惹争议\n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 1 / 3 / 1 / 5: 30%|███ | 6/20 [14:57<34:55, 149.65s/it]\u001b[A\n", - "[Succeeded / Failed / Skipped / Total] 1 / 3 / 2 / 6: 30%|███ | 6/20 [14:57<34:55, 149.65s/it]\u001b[A" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 6 ---------------------------------------------\n", - "[[Mainland china politics (80%)]] --> [[[SKIPPED]]]\n", - "\n", - "朴槿惠今访华 韩媒称访西安可能为增进与习近平友谊\n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 1 / 3 / 2 / 6: 35%|███▌ | 7/20 [15:04<27:59, 129.16s/it]\u001b[A\n", - "[Succeeded / Failed / Skipped / Total] 1 / 3 / 3 / 7: 35%|███▌ | 7/20 [15:04<27:59, 129.16s/it]\u001b[A" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 7 ---------------------------------------------\n", - "[[Mainland china politics (59%)]] --> [[[SKIPPED]]]\n", - "\n", - "中国驻休斯敦总领馆举办春节招待会向华裔拜年\n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 1 / 3 / 3 / 7: 40%|████ | 8/20 [15:08<22:43, 113.60s/it]\u001b[A\n", - "[Succeeded / Failed / Skipped / Total] 1 / 3 / 4 / 8: 40%|████ | 8/20 [15:08<22:43, 113.61s/it]\u001b[A" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 8 ---------------------------------------------\n", - "[[Culture (93%)]] --> [[[SKIPPED]]]\n", - "\n", - "NASA发现“地球兄弟” 具备生命存活条件\n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 1 / 3 / 4 / 8: 45%|████▌ | 9/20 [15:13<18:36, 101.52s/it]\u001b[A\n", - "[Succeeded / Failed / Skipped / Total] 1 / 3 / 5 / 9: 45%|████▌ | 9/20 [15:13<18:36, 101.52s/it]\u001b[A" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 9 ---------------------------------------------\n", - "[[Culture (53%)]] --> [[[SKIPPED]]]\n", - "\n", - "儿子去世后社交网站账号停用 父亲请求保留记忆\n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 1 / 3 / 5 / 9: 50%|█████ | 10/20 [18:20<18:20, 110.06s/it]\u001b[A\n", - "[Succeeded / Failed / Skipped / Total] 2 / 3 / 5 / 10: 50%|█████ | 10/20 [18:20<18:20, 110.06s/it]\u001b[A" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 10 ---------------------------------------------\n", - "[[Culture (100%)]] --> [[Entertainment (72%)]]\n", - "\n", - "第六届鲁迅文学奖颁发 格非等35位获奖者领奖\n", - "\n", - "第六届决赛颁发 格非等35位获奖者领奖\n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 2 / 3 / 5 / 10: 55%|█████▌ | 11/20 [22:44<18:36, 124.02s/it]\u001b[A\n", - "[Succeeded / Failed / Skipped / Total] 3 / 3 / 5 / 11: 55%|█████▌ | 11/20 [22:44<18:36, 124.02s/it]\u001b[A" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 11 ---------------------------------------------\n", - "[[Hong kong - macau politics (96%)]] --> [[Culture (79%)]]\n", - "\n", - "东莞台商欲借“台博会”搭建内销平台\n", - "\n", - "东莞讯欲借“艺博会”搭建内销平台\n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 3 / 3 / 5 / 11: 60%|██████ | 12/20 [22:48<15:12, 114.07s/it]\u001b[A\n", - "[Succeeded / Failed / Skipped / Total] 3 / 3 / 6 / 12: 60%|██████ | 12/20 [22:48<15:12, 114.07s/it]\u001b[A" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 12 ---------------------------------------------\n", - "[[Financial news (56%)]] --> [[[SKIPPED]]]\n", - "\n", - "日本网友买扇贝当下酒菜 发现内有真正珍珠(图)\n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 3 / 3 / 6 / 12: 65%|██████▌ | 13/20 [28:59<15:36, 133.78s/it]\u001b[A\n", - "[Succeeded / Failed / Skipped / Total] 3 / 4 / 6 / 13: 65%|██████▌ | 13/20 [28:59<15:36, 133.78s/it]\u001b[A" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 13 ---------------------------------------------\n", - "[[Sports (100%)]] --> [[[FAILED]]]\n", - "\n", - "篮球热潮席卷张江 NBA中投王与拉拉队鼎力加盟\n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 3 / 4 / 6 / 13: 70%|███████ | 14/20 [33:40<14:26, 144.34s/it]\u001b[A\n", - "[Succeeded / Failed / Skipped / Total] 3 / 5 / 6 / 14: 70%|███████ | 14/20 [33:40<14:26, 144.34s/it]\u001b[A" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 14 ---------------------------------------------\n", - "[[Sports (100%)]] --> [[[FAILED]]]\n", - "\n", - "UFC终极格斗冠军赛开打 \"草原狼\"遭遇三连败\n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 3 / 5 / 6 / 14: 75%|███████▌ | 15/20 [33:45<11:15, 135.04s/it]\u001b[A\n", - "[Succeeded / Failed / Skipped / Total] 3 / 5 / 7 / 15: 75%|███████▌ | 15/20 [33:45<11:15, 135.04s/it]\u001b[A" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 15 ---------------------------------------------\n", - "[[Culture (92%)]] --> [[[SKIPPED]]]\n", - "\n", - "水果style:心形水果惹人爱 骰子西瓜乐趣多(图)\n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 3 / 5 / 7 / 15: 80%|████████ | 16/20 [40:09<10:02, 150.60s/it]\u001b[A\n", - "[Succeeded / Failed / Skipped / Total] 3 / 6 / 7 / 16: 80%|████████ | 16/20 [40:09<10:02, 150.60s/it]\u001b[A" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 16 ---------------------------------------------\n", - "[[Sports (100%)]] --> [[[FAILED]]]\n", - "\n", - "同里杯中国天元赛前瞻:芈昱廷李钦诚争挑战权\n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 3 / 6 / 7 / 16: 85%|████████▌ | 17/20 [43:32<07:41, 153.67s/it]\u001b[A\n", - "[Succeeded / Failed / Skipped / Total] 4 / 6 / 7 / 17: 85%|████████▌ | 17/20 [43:32<07:41, 153.67s/it]\u001b[A" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 17 ---------------------------------------------\n", - "[[Entertainment (100%)]] --> [[Financial news (99%)]]\n", - "\n", - "桂纶镁为戏体验生活 东北洗衣店当店员\n", - "\n", - "桂纶品牌为首体验生活 东北洗衣店当家\n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 4 / 6 / 7 / 17: 90%|█████████ | 18/20 [44:01<04:53, 146.75s/it]\u001b[A\n", - "[Succeeded / Failed / Skipped / Total] 4 / 7 / 7 / 18: 90%|█████████ | 18/20 [44:01<04:53, 146.75s/it]\u001b[A" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 18 ---------------------------------------------\n", - "[[Culture (95%)]] --> [[[FAILED]]]\n", - "\n", - "河南羲皇故都朝祖会流传6000年 一天游客80万人\n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 4 / 7 / 7 / 18: 95%|█████████▌| 19/20 [44:07<02:19, 139.35s/it]\u001b[A\n", - "[Succeeded / Failed / Skipped / Total] 4 / 7 / 8 / 19: 95%|█████████▌| 19/20 [44:07<02:19, 139.35s/it]\u001b[A" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 19 ---------------------------------------------\n", - "[[Culture (92%)]] --> [[[SKIPPED]]]\n", - "\n", - "辛柏青谈追求妻子:用1袋洗衣粉、2块肥皂打动她的\n", - "\n", - "\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n", - "[Succeeded / Failed / Skipped / Total] 4 / 7 / 8 / 19: 100%|██████████| 20/20 [49:19<00:00, 147.96s/it]\u001b[A\n", - "[Succeeded / Failed / Skipped / Total] 5 / 7 / 8 / 20: 100%|██████████| 20/20 [49:19<00:00, 147.96s/it]" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--------------------------------------------- Result 20 ---------------------------------------------\n", - "[[International news (100%)]] --> [[Mainland china politics (66%)]]\n", - "\n", - "朝鲜谴责韩国前方部队打出反朝口号\n", - "\n", - "中国谴责日本前方部队打出侵略口号\n", - "\n", - "\n", - "\n", - "+-------------------------------+--------+\n", - "| Attack Results | |\n", - "+-------------------------------+--------+\n", - "| Number of successful attacks: | 5 |\n", - "| Number of failed attacks: | 7 |\n", - "| Number of skipped attacks: | 8 |\n", - "| Original accuracy: | 60.0% |\n", - "| Accuracy under attack: | 35.0% |\n", - "| Attack success rate: | 41.67% |\n", - "| Average perturbed word %: | 36.39% |\n", - "| Average num. words per input: | 9.3 |\n", - "| Avg num queries: | 45.5 |\n", - "+-------------------------------+--------+\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n" - ] - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "Attack(\n", + " (search_method): GreedyWordSwapWIR(\n", + " (wir_method): weighted-saliency\n", + " )\n", + " (goal_function): UntargetedClassification\n", + " (transformation): ChineseWordSwapMaskedLM\n", + " (constraints): \n", + " (0): RepeatModification\n", + " (1): StopwordModification\n", + " (is_black_box): True\n", + ") \n", + "\n" + ] }, { - "cell_type": "markdown", - "source": [ - "As aforementioned, we can also augment Chinese sentences with the provided transformation. A quick examples is shown below:" - ], - "metadata": { - "id": "3e_tQiHWS-Pb" - } + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + " 0%| | 0/20 [00:00 [[[FAILED]]]\n", + "\n", + "林书豪新秀赛上甘心\"跑龙套\" 自称仍是底薪球员\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 0 / 1 / 0 / 1: 10%|█ | 2/20 [06:55<1:02:18, 207.69s/it]\u001b[A\n", + "[Succeeded / Failed / Skipped / Total] 0 / 2 / 0 / 2: 10%|█ | 2/20 [06:55<1:02:18, 207.70s/it]\u001b[A" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 2 ---------------------------------------------\n", + "[[Culture (100%)]] --> [[[FAILED]]]\n", + "\n", + "成都现“真人图书馆”:无书“借人”给你读\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 0 / 2 / 0 / 2: 15%|█▌ | 3/20 [07:01<39:50, 140.61s/it] \u001b[A\n", + "[Succeeded / Failed / Skipped / Total] 0 / 2 / 1 / 3: 15%|█▌ | 3/20 [07:01<39:50, 140.61s/it]\u001b[A" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 3 ---------------------------------------------\n", + "[[Mainland china politics (57%)]] --> [[[SKIPPED]]]\n", + "\n", + "中国经济走向更趋稳健务实\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 0 / 2 / 1 / 3: 20%|██ | 4/20 [11:33<46:12, 173.28s/it]\u001b[A\n", + "[Succeeded / Failed / Skipped / Total] 0 / 3 / 1 / 4: 20%|██ | 4/20 [11:33<46:12, 173.28s/it]\u001b[A" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 4 ---------------------------------------------\n", + "[[Sports (100%)]] --> [[[FAILED]]]\n", + "\n", + "国际田联世界挑战赛 罗伯斯迎来赛季第三冠\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 0 / 3 / 1 / 4: 25%|██▌ | 5/20 [14:52<44:36, 178.44s/it]\u001b[A" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 5 ---------------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 1 / 3 / 1 / 5: 25%|██▌ | 5/20 [14:53<44:39, 178.62s/it]\u001b[A" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[International news (66%)]] --> [[Entertainment (68%)]]\n", + "\n", + "德国一电视台合成“默克尔头巾照”惹争议\n", + "\n", + "德国一电视台合成“性感头巾照”惹争议\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 1 / 3 / 1 / 5: 30%|███ | 6/20 [14:57<34:55, 149.65s/it]\u001b[A\n", + "[Succeeded / Failed / Skipped / Total] 1 / 3 / 2 / 6: 30%|███ | 6/20 [14:57<34:55, 149.65s/it]\u001b[A" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 6 ---------------------------------------------\n", + "[[Mainland china politics (80%)]] --> [[[SKIPPED]]]\n", + "\n", + "朴槿惠今访华 韩媒称访西安可能为增进与习近平友谊\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 1 / 3 / 2 / 6: 35%|███▌ | 7/20 [15:04<27:59, 129.16s/it]\u001b[A\n", + "[Succeeded / Failed / Skipped / Total] 1 / 3 / 3 / 7: 35%|███▌ | 7/20 [15:04<27:59, 129.16s/it]\u001b[A" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 7 ---------------------------------------------\n", + "[[Mainland china politics (59%)]] --> [[[SKIPPED]]]\n", + "\n", + "中国驻休斯敦总领馆举办春节招待会向华裔拜年\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 1 / 3 / 3 / 7: 40%|████ | 8/20 [15:08<22:43, 113.60s/it]\u001b[A\n", + "[Succeeded / Failed / Skipped / Total] 1 / 3 / 4 / 8: 40%|████ | 8/20 [15:08<22:43, 113.61s/it]\u001b[A" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 8 ---------------------------------------------\n", + "[[Culture (93%)]] --> [[[SKIPPED]]]\n", + "\n", + "NASA发现“地球兄弟” 具备生命存活条件\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 1 / 3 / 4 / 8: 45%|████▌ | 9/20 [15:13<18:36, 101.52s/it]\u001b[A\n", + "[Succeeded / Failed / Skipped / Total] 1 / 3 / 5 / 9: 45%|████▌ | 9/20 [15:13<18:36, 101.52s/it]\u001b[A" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 9 ---------------------------------------------\n", + "[[Culture (53%)]] --> [[[SKIPPED]]]\n", + "\n", + "儿子去世后社交网站账号停用 父亲请求保留记忆\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 1 / 3 / 5 / 9: 50%|█████ | 10/20 [18:20<18:20, 110.06s/it]\u001b[A\n", + "[Succeeded / Failed / Skipped / Total] 2 / 3 / 5 / 10: 50%|█████ | 10/20 [18:20<18:20, 110.06s/it]\u001b[A" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 10 ---------------------------------------------\n", + "[[Culture (100%)]] --> [[Entertainment (72%)]]\n", + "\n", + "第六届鲁迅文学奖颁发 格非等35位获奖者领奖\n", + "\n", + "第六届决赛颁发 格非等35位获奖者领奖\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 2 / 3 / 5 / 10: 55%|█████▌ | 11/20 [22:44<18:36, 124.02s/it]\u001b[A\n", + "[Succeeded / Failed / Skipped / Total] 3 / 3 / 5 / 11: 55%|█████▌ | 11/20 [22:44<18:36, 124.02s/it]\u001b[A" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 11 ---------------------------------------------\n", + "[[Hong kong - macau politics (96%)]] --> [[Culture (79%)]]\n", + "\n", + "东莞台商欲借“台博会”搭建内销平台\n", + "\n", + "东莞讯欲借“艺博会”搭建内销平台\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 3 / 3 / 5 / 11: 60%|██████ | 12/20 [22:48<15:12, 114.07s/it]\u001b[A\n", + "[Succeeded / Failed / Skipped / Total] 3 / 3 / 6 / 12: 60%|██████ | 12/20 [22:48<15:12, 114.07s/it]\u001b[A" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 12 ---------------------------------------------\n", + "[[Financial news (56%)]] --> [[[SKIPPED]]]\n", + "\n", + "日本网友买扇贝当下酒菜 发现内有真正珍珠(图)\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 3 / 3 / 6 / 12: 65%|██████▌ | 13/20 [28:59<15:36, 133.78s/it]\u001b[A\n", + "[Succeeded / Failed / Skipped / Total] 3 / 4 / 6 / 13: 65%|██████▌ | 13/20 [28:59<15:36, 133.78s/it]\u001b[A" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 13 ---------------------------------------------\n", + "[[Sports (100%)]] --> [[[FAILED]]]\n", + "\n", + "篮球热潮席卷张江 NBA中投王与拉拉队鼎力加盟\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 3 / 4 / 6 / 13: 70%|███████ | 14/20 [33:40<14:26, 144.34s/it]\u001b[A\n", + "[Succeeded / Failed / Skipped / Total] 3 / 5 / 6 / 14: 70%|███████ | 14/20 [33:40<14:26, 144.34s/it]\u001b[A" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 14 ---------------------------------------------\n", + "[[Sports (100%)]] --> [[[FAILED]]]\n", + "\n", + "UFC终极格斗冠军赛开打 \"草原狼\"遭遇三连败\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 3 / 5 / 6 / 14: 75%|███████▌ | 15/20 [33:45<11:15, 135.04s/it]\u001b[A\n", + "[Succeeded / Failed / Skipped / Total] 3 / 5 / 7 / 15: 75%|███████▌ | 15/20 [33:45<11:15, 135.04s/it]\u001b[A" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 15 ---------------------------------------------\n", + "[[Culture (92%)]] --> [[[SKIPPED]]]\n", + "\n", + "水果style:心形水果惹人爱 骰子西瓜乐趣多(图)\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 3 / 5 / 7 / 15: 80%|████████ | 16/20 [40:09<10:02, 150.60s/it]\u001b[A\n", + "[Succeeded / Failed / Skipped / Total] 3 / 6 / 7 / 16: 80%|████████ | 16/20 [40:09<10:02, 150.60s/it]\u001b[A" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 16 ---------------------------------------------\n", + "[[Sports (100%)]] --> [[[FAILED]]]\n", + "\n", + "同里杯中国天元赛前瞻:芈昱廷李钦诚争挑战权\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 3 / 6 / 7 / 16: 85%|████████▌ | 17/20 [43:32<07:41, 153.67s/it]\u001b[A\n", + "[Succeeded / Failed / Skipped / Total] 4 / 6 / 7 / 17: 85%|████████▌ | 17/20 [43:32<07:41, 153.67s/it]\u001b[A" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 17 ---------------------------------------------\n", + "[[Entertainment (100%)]] --> [[Financial news (99%)]]\n", + "\n", + "桂纶镁为戏体验生活 东北洗衣店当店员\n", + "\n", + "桂纶品牌为首体验生活 东北洗衣店当家\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 4 / 6 / 7 / 17: 90%|█████████ | 18/20 [44:01<04:53, 146.75s/it]\u001b[A\n", + "[Succeeded / Failed / Skipped / Total] 4 / 7 / 7 / 18: 90%|█████████ | 18/20 [44:01<04:53, 146.75s/it]\u001b[A" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 18 ---------------------------------------------\n", + "[[Culture (95%)]] --> [[[FAILED]]]\n", + "\n", + "河南羲皇故都朝祖会流传6000年 一天游客80万人\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 4 / 7 / 7 / 18: 95%|█████████▌| 19/20 [44:07<02:19, 139.35s/it]\u001b[A\n", + "[Succeeded / Failed / Skipped / Total] 4 / 7 / 8 / 19: 95%|█████████▌| 19/20 [44:07<02:19, 139.35s/it]\u001b[A" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 19 ---------------------------------------------\n", + "[[Culture (92%)]] --> [[[SKIPPED]]]\n", + "\n", + "辛柏青谈追求妻子:用1袋洗衣粉、2块肥皂打动她的\n", + "\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n", + "[Succeeded / Failed / Skipped / Total] 4 / 7 / 8 / 19: 100%|██████████| 20/20 [49:19<00:00, 147.96s/it]\u001b[A\n", + "[Succeeded / Failed / Skipped / Total] 5 / 7 / 8 / 20: 100%|██████████| 20/20 [49:19<00:00, 147.96s/it]" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--------------------------------------------- Result 20 ---------------------------------------------\n", + "[[International news (100%)]] --> [[Mainland china politics (66%)]]\n", + "\n", + "朝鲜谴责韩国前方部队打出反朝口号\n", + "\n", + "中国谴责日本前方部队打出侵略口号\n", + "\n", + "\n", + "\n", + "+-------------------------------+--------+\n", + "| Attack Results | |\n", + "+-------------------------------+--------+\n", + "| Number of successful attacks: | 5 |\n", + "| Number of failed attacks: | 7 |\n", + "| Number of skipped attacks: | 8 |\n", + "| Original accuracy: | 60.0% |\n", + "| Accuracy under attack: | 35.0% |\n", + "| Attack success rate: | 41.67% |\n", + "| Average perturbed word %: | 36.39% |\n", + "| Average num. words per input: | 9.3 |\n", + "| Avg num queries: | 45.5 |\n", + "+-------------------------------+--------+\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "As aforementioned, we can also augment Chinese sentences with the provided transformation. A quick examples is shown below:" + ], + "metadata": { + "id": "3e_tQiHWS-Pb" + } + }, + { + "cell_type": "code", + "source": [ + "from textattack.constraints.pre_transformation import RepeatModification\n", + "from textattack.constraints.pre_transformation import StopwordModification\n", + "from textattack.augmentation import Augmenter\n", + "\n", + "# transformation\n", + "transformation = ChineseMorphonymCharacterSwap()\n", + "\n", + "# constraints\n", + "constraints = [RepeatModification(), StopwordModification()]\n", + "\n", + "# Create augmenter with specified parameters\n", + "augmenter = Augmenter(\n", + " transformation=transformation, pct_words_to_swap=0.1, transformations_per_example=2\n", + ")\n", + "s = \"听见树林的呢喃,发现溪流中的知识。\"\n", + "\n", + "# Augment!\n", + "augmenter.augment(s)" + ], + "metadata": { + "id": "43MCRE0pqVM0", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "2ad12bf5-3bd8-4c8d-913c-949fcae787d3" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Building prefix dict from the default dictionary ...\n", + "DEBUG:jieba:Building prefix dict from the default dictionary ...\n", + "Dumping model to file cache /tmp/jieba.cache\n", + "DEBUG:jieba:Dumping model to file cache /tmp/jieba.cache\n", + "Loading model cost 0.888 seconds.\n", + "DEBUG:jieba:Loading model cost 0.888 seconds.\n", + "Prefix dict has been built successfully.\n", + "DEBUG:jieba:Prefix dict has been built successfully.\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['听见树林的呢喃,发现溪流中的知织。', '听见树林的呢喃,发视溪流中的知识。']" ] + }, + "metadata": {}, + "execution_count": 11 } - ] + ] + } + ] } \ No newline at end of file diff --git a/textattack/models/wrappers/pyabsa_model_wrapper.py b/textattack/models/wrappers/pyabsa_model_wrapper.py index 001b5bc34..9e9166688 100644 --- a/textattack/models/wrappers/pyabsa_model_wrapper.py +++ b/textattack/models/wrappers/pyabsa_model_wrapper.py @@ -9,7 +9,7 @@ class TADModelWrapper(HuggingFaceModelWrapper): """Transformers sentiment analysis pipeline returns a list of responses - like + like. [{'label': 'POSITIVE', 'score': 0.7817379832267761}] diff --git a/textattack/reactive_defense/tad_reactive_defender.py b/textattack/reactive_defense/tad_reactive_defender.py index f2f6ea81b..195e31173 100644 --- a/textattack/reactive_defense/tad_reactive_defender.py +++ b/textattack/reactive_defense/tad_reactive_defender.py @@ -14,7 +14,7 @@ class TADReactiveDefender(ReactiveDefender): """Transformers sentiment analysis pipeline returns a list of responses - like + like. [{'label': 'POSITIVE', 'score': 0.7817379832267761}]