From 53e9accb414ddabacbea9b590aff6feed39e5196 Mon Sep 17 00:00:00 2001 From: Jack Morris Date: Tue, 1 Nov 2022 11:26:14 -0400 Subject: [PATCH 1/7] update for t5 --- textattack/datasets/helpers/ted_multi.py | 14 +++++++++++--- .../text/text_to_text_goal_function.py | 6 +++++- textattack/models/tokenizers/t5_tokenizer.py | 4 ++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/textattack/datasets/helpers/ted_multi.py b/textattack/datasets/helpers/ted_multi.py index 616a2e805..39574019c 100644 --- a/textattack/datasets/helpers/ted_multi.py +++ b/textattack/datasets/helpers/ted_multi.py @@ -11,6 +11,7 @@ import numpy as np from textattack.datasets import HuggingFaceDataset +from textattack.datasets.huggingface_dataset import get_datasets_dataset_columns class TedMultiTranslationDataset(HuggingFaceDataset): @@ -35,12 +36,19 @@ def __init__(self, source_lang="en", target_lang="de", split="test", shuffle=Fal self.source_lang = source_lang self.target_lang = target_lang self.shuffled = shuffle + self.label_map = None + self.output_scale_factor = None + self.label_names = None + # self.input_columns = ("Source",) + # self.output_column = "Translation" + if shuffle: self._dataset.shuffle() - def _format_raw_example(self, raw_example): - translations = np.array(raw_example["translation"]) - languages = np.array(raw_example["language"]) + def _format_as_dict(self, raw_example): + example = raw_example["translations"] + translations = np.array(example["translation"]) + languages = np.array(example["language"]) source = translations[languages == self.source_lang][0] target = translations[languages == self.target_lang][0] source_dict = collections.OrderedDict([("Source", source)]) diff --git a/textattack/goal_functions/text/text_to_text_goal_function.py b/textattack/goal_functions/text/text_to_text_goal_function.py index 9e4bac3be..341140768 100644 --- a/textattack/goal_functions/text/text_to_text_goal_function.py +++ b/textattack/goal_functions/text/text_to_text_goal_function.py @@ -4,6 +4,7 @@ ------------------------------------------------------- """ +import numpy as np from textattack.goal_function_results import TextToTextGoalFunctionResult from textattack.goal_functions import GoalFunction @@ -22,7 +23,10 @@ def _goal_function_result_type(self): def _process_model_outputs(self, _, outputs): """Processes and validates a list of model outputs.""" - return outputs.flatten() + if isinstance(outputs, np.ndarray): + return outputs.flatten() + else: + return outputs def _get_displayed_output(self, raw_output): return raw_output diff --git a/textattack/models/tokenizers/t5_tokenizer.py b/textattack/models/tokenizers/t5_tokenizer.py index a252e9134..f90aa04c4 100644 --- a/textattack/models/tokenizers/t5_tokenizer.py +++ b/textattack/models/tokenizers/t5_tokenizer.py @@ -38,7 +38,7 @@ def __init__(self, mode="english_to_german", max_length=64): self.tokenizer = transformers.AutoTokenizer.from_pretrained( "t5-base", use_fast=True ) - self.max_length = max_length + self.model_max_length = max_length def __call__(self, text, *args, **kwargs): """ @@ -55,7 +55,7 @@ def __call__(self, text, *args, **kwargs): else: for i in range(len(text)): text[i] = self.tokenization_prefix + text[i] - return self.tokenizer(text, *args, max_length=self.max_length, **kwargs) + return self.tokenizer(text, *args, **kwargs) def decode(self, ids): """Converts IDs (typically generated by the model) back to a string.""" From 1754b6a5e26db5f52e26db75cf5f3d6a4b22eb68 Mon Sep 17 00:00:00 2001 From: Jack Morris Date: Wed, 2 Nov 2022 14:21:03 -0400 Subject: [PATCH 2/7] remove unnecessary import --- textattack/datasets/helpers/ted_multi.py | 1 - 1 file changed, 1 deletion(-) diff --git a/textattack/datasets/helpers/ted_multi.py b/textattack/datasets/helpers/ted_multi.py index 39574019c..9e36c2694 100644 --- a/textattack/datasets/helpers/ted_multi.py +++ b/textattack/datasets/helpers/ted_multi.py @@ -11,7 +11,6 @@ import numpy as np from textattack.datasets import HuggingFaceDataset -from textattack.datasets.huggingface_dataset import get_datasets_dataset_columns class TedMultiTranslationDataset(HuggingFaceDataset): From a3b36b56fb1d398695f6d0c413f712ab5a6f4442 Mon Sep 17 00:00:00 2001 From: Jack Morris Date: Wed, 2 Nov 2022 15:38:44 -0400 Subject: [PATCH 3/7] v0.3.8 --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index f789e2760..aa57069d8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,7 +21,7 @@ author = "UVA QData Lab" # The full version, including alpha/beta/rc tags -release = "0.3.7" +release = "0.3.8" # Set master doc to `index.rst`. master_doc = "index" From a3394d69dec191917bc6beed7ea1a91a5bd1efab Mon Sep 17 00:00:00 2001 From: plasmashen Date: Tue, 13 Dec 2022 16:36:15 +0800 Subject: [PATCH 4/7] fix text output when using T5 model --- textattack/goal_functions/goal_function.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/textattack/goal_functions/goal_function.py b/textattack/goal_functions/goal_function.py index 16f498301..78693f670 100644 --- a/textattack/goal_functions/goal_function.py +++ b/textattack/goal_functions/goal_function.py @@ -176,13 +176,15 @@ def _call_model_uncached(self, attacked_text_list): if isinstance(batch_preds, list): outputs.extend(batch_preds) elif isinstance(batch_preds, np.ndarray): - outputs.append(torch.tensor(batch_preds)) + outputs.append(batch_preds) else: outputs.append(batch_preds) i += self.batch_size if isinstance(outputs[0], torch.Tensor): outputs = torch.cat(outputs, dim=0) + elif isinstance(outputs[0], np.ndarray): + outputs = np.concatenate(outputs).ravel() assert len(inputs) == len( outputs From 6554d6c365e7f0a5fb58a806befc43ac97fea8c9 Mon Sep 17 00:00:00 2001 From: Jack Morris Date: Sun, 6 Nov 2022 10:35:41 -0500 Subject: [PATCH 5/7] fix command help str :-) --- textattack/commands/textattack_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/textattack/commands/textattack_cli.py b/textattack/commands/textattack_cli.py index 5e5073f7d..219d6500c 100644 --- a/textattack/commands/textattack_cli.py +++ b/textattack/commands/textattack_cli.py @@ -22,7 +22,7 @@ def main(): parser = argparse.ArgumentParser( "TextAttack CLI", - usage="[python -m] texattack []", + usage="[python -m] textattack []", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) subparsers = parser.add_subparsers(help="textattack command helpers") From a40f5e3ef2bff728fc3b793b55746114dfcb79fc Mon Sep 17 00:00:00 2001 From: Alex McKenzie Date: Mon, 28 Nov 2022 11:52:44 +0100 Subject: [PATCH 6/7] Fix links in embedded HTML table Markdown links don't work inside HTML tables in markdown --- docs/3recipes/attack_recipes_cmd.md | 50 ++++++++++++++--------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/docs/3recipes/attack_recipes_cmd.md b/docs/3recipes/attack_recipes_cmd.md index 9bd7d5c8d..038ebb113 100644 --- a/docs/3recipes/attack_recipes_cmd.md +++ b/docs/3recipes/attack_recipes_cmd.md @@ -1,40 +1,40 @@ # Attack Recipes CommandLine Use -We provide a number of pre-built attack recipes, which correspond to attacks from the literature. +We provide a number of pre-built attack recipes, which correspond to attacks from the literature. ## Help: `textattack --help` TextAttack's main features can all be accessed via the `textattack` command. Two very common commands are `textattack attack `, and `textattack augment `. You can see more -information about all commands using +information about all commands using ```bash -textattack --help +textattack --help ``` or a specific command using, for example, ```bash textattack attack --help ``` -The [`examples/`](https://github.com/QData/TextAttack/tree/master/examples) folder includes scripts showing common TextAttack usage for training models, running attacks, and augmenting a CSV file. +The [`examples/`](https://github.com/QData/TextAttack/tree/master/examples) folder includes scripts showing common TextAttack usage for training models, running attacks, and augmenting a CSV file. The [documentation website](https://textattack.readthedocs.io/en/latest) contains walkthroughs explaining basic usage of TextAttack, including building a custom transformation and a custom constraint.. ## Running Attacks: `textattack attack --help` -The easiest way to try out an attack is via the command-line interface, `textattack attack`. +The easiest way to try out an attack is via the command-line interface, `textattack attack`. > **Tip:** If your machine has multiple GPUs, you can distribute the attack across them using the `--parallel` option. For some attacks, this can really help performance. Here are some concrete examples: -*TextFooler on BERT trained on the MR sentiment classification dataset*: +*TextFooler on BERT trained on the MR sentiment classification dataset*: ```bash textattack attack --recipe textfooler --model bert-base-uncased-mr --num-examples 100 ``` -*DeepWordBug on DistilBERT trained on the Quora Question Pairs paraphrase identification dataset*: +*DeepWordBug on DistilBERT trained on the Quora Question Pairs paraphrase identification dataset*: ```bash textattack attack --model distilbert-base-uncased-cola --recipe deepwordbug --num-examples 100 ``` @@ -76,7 +76,7 @@ To run an attack recipe: `textattack attack --recipe [recipe_name]` Percentage of words perturbed, Language Model perplexity, Word embedding distance Counter-fitted word embedding swap Genetic Algorithm -from (["Generating Natural Language Adversarial Examples" (Alzantot et al., 2018)](https://arxiv.org/abs/1804.07998)) +from Generating Natural Language Adversarial Examples" (Alzantot et al., 2018) bae @@ -84,7 +84,7 @@ To run an attack recipe: `textattack attack --recipe [recipe_name]` USE sentence encoding cosine similarity BERT Masked Token Prediction Greedy-WIR -BERT masked language model transformation attack from (["BAE: BERT-based Adversarial Examples for Text Classification" (Garg & Ramakrishnan, 2019)](https://arxiv.org/abs/2004.01970)). +BERT masked language model transformation attack from "BAE: BERT-based Adversarial Examples for Text Classification" (Garg & Ramakrishnan, 2019). bert-attack @@ -92,7 +92,7 @@ To run an attack recipe: `textattack attack --recipe [recipe_name]` USE sentence encoding cosine similarity, Maximum number of words perturbed BERT Masked Token Prediction (with subword expansion) Greedy-WIR - (["BERT-ATTACK: Adversarial Attack Against BERT Using BERT" (Li et al., 2020)](https://arxiv.org/abs/2004.09984)) + "BERT-ATTACK: Adversarial Attack Against BERT Using BERT" (Li et al., 2020) checklist @@ -100,7 +100,7 @@ To run an attack recipe: `textattack attack --recipe [recipe_name]` checklist distance contract, extend, and substitutes name entities Greedy-WIR -Invariance testing implemented in CheckList . (["Beyond Accuracy: Behavioral Testing of NLP models with CheckList" (Ribeiro et al., 2020)](https://arxiv.org/abs/2005.04118)) +Invariance testing implemented in CheckList. "Beyond Accuracy: Behavioral Testing of NLP models with CheckList" (Ribeiro et al., 2020) clare @@ -108,7 +108,7 @@ To run an attack recipe: `textattack attack --recipe [recipe_name]` USE sentence encoding cosine similarity RoBERTa Masked Prediction for token swap, insert and merge Greedy -["Contextualized Perturbation for Textual Adversarial Attack" (Li et al., 2020)](https://arxiv.org/abs/2009.07502)) +"Contextualized Perturbation for Textual Adversarial Attack" (Li et al., 2020) deepwordbug @@ -116,7 +116,7 @@ To run an attack recipe: `textattack attack --recipe [recipe_name]` Levenshtein edit distance {Character Insertion, Character Deletion, Neighboring Character Swap, Character Substitution} Greedy-WIR -Greedy replace-1 scoring and multi-transformation character-swap attack (["Black-box Generation of Adversarial Text Sequences to Evade Deep Learning Classifiers" (Gao et al., 2018)](https://arxiv.org/abs/1801.04354) +Greedy replace-1 scoring and multi-transformation character-swap attack, from "Black-box Generation of Adversarial Text Sequences to Evade Deep Learning Classifiers" (Gao et al., 2018) faster-alzantot @@ -124,7 +124,7 @@ To run an attack recipe: `textattack attack --recipe [recipe_name]` Percentage of words perturbed, Language Model perplexity, Word embedding distance Counter-fitted word embedding swap Genetic Algorithm -Modified, faster version of the Alzantot et al. genetic algorithm, from (["Certified Robustness to Adversarial Word Substitutions" (Jia et al., 2019)](https://arxiv.org/abs/1909.00986)) +Modified, faster version of the Alzantot et al. genetic algorithm, from "Certified Robustness to Adversarial Word Substitutions" (Jia et al., 2019) hotflip (word swap) @@ -132,7 +132,7 @@ To run an attack recipe: `textattack attack --recipe [recipe_name]` Word Embedding Cosine Similarity, Part-of-speech match, Number of words perturbed Gradient-Based Word Swap Beam search - (["HotFlip: White-Box Adversarial Examples for Text Classification" (Ebrahimi et al., 2017)](https://arxiv.org/abs/1712.06751)) +from "HotFlip: White-Box Adversarial Examples for Text Classification" (Ebrahimi et al., 2017) iga @@ -140,7 +140,7 @@ To run an attack recipe: `textattack attack --recipe [recipe_name]` Percentage of words perturbed, Word embedding distance Counter-fitted word embedding swap Genetic Algorithm -Improved genetic algorithm -based word substitution from (["Natural Language Adversarial Attacks and Defenses in Word Level (Wang et al., 2019)"](https://arxiv.org/abs/1909.06723) +Improved genetic algorithm -based word substitution, from "Natural Language Adversarial Attacks and Defenses in Word Level" (Wang et al., 2019) input-reduction @@ -148,7 +148,7 @@ To run an attack recipe: `textattack attack --recipe [recipe_name]` Word deletion Greedy-WIR -Greedy attack with word importance ranking , Reducing the input while maintaining the prediction through word importance ranking (["Pathologies of Neural Models Make Interpretation Difficult" (Feng et al., 2018)](https://arxiv.org/pdf/1804.07781.pdf)) +Greedy attack with word importance ranking, reducing the input while maintaining the prediction through word importance ranking, from "Pathologies of Neural Models Make Interpretation Difficult" (Feng et al., 2018) kuleshov @@ -156,7 +156,7 @@ To run an attack recipe: `textattack attack --recipe [recipe_name]` Thought vector encoding cosine similarity, Language model similarity probability Counter-fitted word embedding swap Greedy word swap -(["Adversarial Examples for Natural Language Classification Problems" (Kuleshov et al., 2018)](https://openreview.net/pdf?id=r1QZ3zbAZ)) +From "Adversarial Examples for Natural Language Classification Problems" (Kuleshov et al., 2018 pruthi @@ -164,7 +164,7 @@ To run an attack recipe: `textattack attack --recipe [recipe_name]` Minimum word length, Maximum number of words perturbed {Neighboring Character Swap, Character Deletion, Character Insertion, Keyboard-Based Character Swap} Greedy search -simulates common typos (["Combating Adversarial Misspellings with Robust Word Recognition" (Pruthi et al., 2019)](https://arxiv.org/abs/1905.11268) +simulates common typos, from "Combating Adversarial Misspellings with Robust Word Recognition" (Pruthi et al., 2019) pso @@ -172,7 +172,7 @@ To run an attack recipe: `textattack attack --recipe [recipe_name]` HowNet Word Swap Particle Swarm Optimization -(["Word-level Textual Adversarial Attacking as Combinatorial Optimization" (Zang et al., 2020)](https://www.aclweb.org/anthology/2020.acl-main.540/)) +From "Word-level Textual Adversarial Attacking as Combinatorial Optimization" (Zang et al., 2020) pwws @@ -180,7 +180,7 @@ To run an attack recipe: `textattack attack --recipe [recipe_name]` WordNet-based synonym swap Greedy-WIR (saliency) -Greedy attack with word importance ranking based on word saliency and synonym swap scores (["Generating Natural Language Adversarial Examples through Probability Weighted Word Saliency" (Ren et al., 2019)](https://www.aclweb.org/anthology/P19-1103/)) +Greedy attack with word importance ranking based on word saliency and synonym swap scores, from "Generating Natural Language Adversarial Examples through Probability Weighted Word Saliency" (Ren et al., 2019) textbugger : (black-box) @@ -188,7 +188,7 @@ To run an attack recipe: `textattack attack --recipe [recipe_name]` USE sentence encoding cosine similarity {Character Insertion, Character Deletion, Neighboring Character Swap, Character Substitution} Greedy-WIR -([(["TextBugger: Generating Adversarial Text Against Real-world Applications" (Li et al., 2018)](https://arxiv.org/abs/1812.05271)). +From "TextBugger: Generating Adversarial Text Against Real-world Applications" (Li et al., 2018) textfooler @@ -196,7 +196,7 @@ To run an attack recipe: `textattack attack --recipe [recipe_name]` Word Embedding Distance, Part-of-speech match, USE sentence encoding cosine similarity Counter-fitted word embedding swap Greedy-WIR -Greedy attack with word importance ranking (["Is Bert Really Robust?" (Jin et al., 2019)](https://arxiv.org/abs/1907.11932)) +Greedy attack with word importance ranking, from "Is Bert Really Robust?" (Jin et al., 2019)
Attacks on sequence-to-sequence models:
@@ -207,7 +207,7 @@ To run an attack recipe: `textattack attack --recipe [recipe_name]` Inflection Word Swap Greedy search -Greedy to replace words with their inflections with the goal of minimizing BLEU score (["It’s Morphin’ Time! Combating Linguistic Discrimination with Inflectional Perturbations"](https://www.aclweb.org/anthology/2020.acl-main.263.pdf) +Greedy to replace words with their inflections with the goal of minimizing BLEU score, from "It’s Morphin’ Time! Combating Linguistic Discrimination with Inflectional Perturbations" @@ -217,7 +217,7 @@ To run an attack recipe: `textattack attack --recipe [recipe_name]` Counter-fitted word embedding swap Greedy-WIR -Greedy attack with goal of changing every word in the output translation. Currently implemented as black-box with plans to change to white-box as done in paper (["Seq2Sick: Evaluating the Robustness of Sequence-to-Sequence Models with Adversarial Examples" (Cheng et al., 2018)](https://arxiv.org/abs/1803.01128)) +Greedy attack with goal of changing every word in the output translation. Currently implemented as black-box with plans to change to white-box as done in paper, from "Seq2Sick: Evaluating the Robustness of Sequence-to-Sequence Models with Adversarial Examples" (Cheng et al., 2018) From 9dce2e70b3658cf9f36b1e53a63287faaef05e8f Mon Sep 17 00:00:00 2001 From: Alex McKenzie Date: Mon, 28 Nov 2022 11:57:44 +0100 Subject: [PATCH 7/7] Add author & publish date to Its Morphin Time --- docs/3recipes/attack_recipes_cmd.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/3recipes/attack_recipes_cmd.md b/docs/3recipes/attack_recipes_cmd.md index 038ebb113..f38527a1c 100644 --- a/docs/3recipes/attack_recipes_cmd.md +++ b/docs/3recipes/attack_recipes_cmd.md @@ -207,7 +207,7 @@ To run an attack recipe: `textattack attack --recipe [recipe_name]` Inflection Word Swap Greedy search -Greedy to replace words with their inflections with the goal of minimizing BLEU score, from "It’s Morphin’ Time! Combating Linguistic Discrimination with Inflectional Perturbations" +Greedy to replace words with their inflections with the goal of minimizing BLEU score, from "It’s Morphin’ Time! Combating Linguistic Discrimination with Inflectional Perturbations" (Tan et al., 2020)