From 51191bb099b937c766e0acdf48be1494bcba77a2 Mon Sep 17 00:00:00 2001 From: Yanjun Qi Date: Fri, 29 Sep 2023 23:33:43 -0400 Subject: [PATCH 1/5] fix the local install command in two readme md --- CONTRIBUTING.md | 2 +- docs/1start/support.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 398884a15..458cd5435 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -117,7 +117,7 @@ Follow these steps to start contributing: ```bash $ cd TextAttack - $ pip install -e . ".[dev]" + $ pip install -e .[dev] $ pip install black docformatter isort pytest pytest-xdist ``` diff --git a/docs/1start/support.md b/docs/1start/support.md index f3303411f..e7c1d63d6 100644 --- a/docs/1start/support.md +++ b/docs/1start/support.md @@ -121,7 +121,7 @@ Follow these steps to start contributing: ```bash $ cd TextAttack - $ pip install -e . ".[dev]" + $ pip install -e .[dev] $ pip install black isort pytest pytest-xdist ``` From 12f3de073ba30fb2e466ade21296a573e566f85f Mon Sep 17 00:00:00 2001 From: Yanjun Qi Date: Sat, 30 Sep 2023 10:00:21 -0400 Subject: [PATCH 2/5] format plus read md minor updates --- README.md | 1 + docs/0_get_started/installation.md | 2 +- docs/1start/FAQ.md | 2 +- docs/3recipes/augmenter_recipes_cmd.md | 1 + tests/test_command_line/test_attack.py | 5 +--- tests/test_command_line/test_loggers.py | 5 +--- tests/test_metric_api.py | 4 +++ textattack/__init__.py | 3 +- textattack/attack_args.py | 5 +++- textattack/attack_recipes/clare_li_2020.py | 3 +- .../attack_recipes/deepwordbug_gao_2018.py | 4 +-- .../attack_recipes/morpheus_tan_2020.py | 3 +- textattack/attack_recipes/pwws_ren_2019.py | 11 +++---- .../attack_recipes/textfooler_jin_2019.py | 3 +- textattack/augmentation/recipes.py | 3 +- .../google_language_model/lm_data_utils.py | 2 -- .../learning_to_write/learning_to_write.py | 11 ++++--- .../pre_transformation/min_word_length.py | 3 +- .../semantics/sentence_encoders/bert/bert.py | 4 ++- textattack/metrics/metric.py | 2 +- textattack/metrics/recipe.py | 30 +++++++++++++++++++ .../models/helpers/glove_embedding_layer.py | 6 ++-- textattack/models/wrappers/model_wrapper.py | 9 +++--- textattack/shared/attacked_text.py | 1 - textattack/shared/utils/strings.py | 4 +-- .../word_swaps/word_swap_change_number.py | 2 +- 26 files changed, 84 insertions(+), 45 deletions(-) create mode 100644 textattack/metrics/recipe.py diff --git a/README.md b/README.md index 4ef696cdb..c05e20263 100644 --- a/README.md +++ b/README.md @@ -319,6 +319,7 @@ for data augmentation: - `eda` augments text with a combination of word insertions, substitutions and deletions. - `checklist` augments text by contraction/extension and by substituting names, locations, numbers. - `clare` augments text by replacing, inserting, and merging with a pre-trained masked language model. +- `back_trans` augments text by backtranslation approach. #### Augmentation Command-Line Interface diff --git a/docs/0_get_started/installation.md b/docs/0_get_started/installation.md index 1c24a6906..911c1398f 100644 --- a/docs/0_get_started/installation.md +++ b/docs/0_get_started/installation.md @@ -67,7 +67,7 @@ Besides, we highly recommend you to use virtual environment for textattack use, see [information here](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#removing-an-environment). Here is one conda example: ```bash -conda create -n textattackenv python=3.7 +conda create -n textattackenv python=3.8 conda activate textattackenv conda env list ``` diff --git a/docs/1start/FAQ.md b/docs/1start/FAQ.md index f4a14f3b9..adc38188c 100644 --- a/docs/1start/FAQ.md +++ b/docs/1start/FAQ.md @@ -43,7 +43,7 @@ Besides, we highly recommend you to use virtual environment for textattack use, see [information here](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#removing-an-environment). Here is one conda example: ```bash -conda create -n textattackenv python=3.7 +conda create -n textattackenv python=3.8 conda activate textattackenv conda env list ``` diff --git a/docs/3recipes/augmenter_recipes_cmd.md b/docs/3recipes/augmenter_recipes_cmd.md index c1d496143..bde5b3116 100644 --- a/docs/3recipes/augmenter_recipes_cmd.md +++ b/docs/3recipes/augmenter_recipes_cmd.md @@ -18,6 +18,7 @@ for data augmentation: - `eda` augments text with a combination of word insertions, substitutions and deletions. - `checklist` augments text by contraction/extension and by substituting names, locations, numbers. - `clare` augments text by replacing, inserting, and merging with a pre-trained masked language model. +- `back_trans` augments text by backtranslation method. ### Augmentation Command-Line Interface diff --git a/tests/test_command_line/test_attack.py b/tests/test_command_line/test_attack.py index 83ea05d80..eaaa9310b 100644 --- a/tests/test_command_line/test_attack.py +++ b/tests/test_command_line/test_attack.py @@ -5,10 +5,7 @@ import pytest DEBUG = False - -""" -Attack command-line tests in the format (name, args, sample_output_file) -""" +"""Attack command-line tests in the format (name, args, sample_output_file)""" attack_test_params = [ # diff --git a/tests/test_command_line/test_loggers.py b/tests/test_command_line/test_loggers.py index c6589f60a..62a061cb0 100644 --- a/tests/test_command_line/test_loggers.py +++ b/tests/test_command_line/test_loggers.py @@ -5,10 +5,7 @@ import pytest DEBUG = False - -""" -Attack command-line tests in the format (name, args, sample_output_file) -""" +"""Attack command-line tests in the format (name, args, sample_output_file)""" """ list_test_params data structure requires diff --git a/tests/test_metric_api.py b/tests/test_metric_api.py index 1e75e815e..519139a1a 100644 --- a/tests/test_metric_api.py +++ b/tests/test_metric_api.py @@ -31,6 +31,7 @@ def test_use(): from textattack.attack_recipes import DeepWordBugGao2018 from textattack.datasets import HuggingFaceDataset from textattack.metrics.quality_metrics import USEMetric + from textattack.metrics.recipe import AdvancedAttackMetric from textattack.models.wrappers import HuggingFaceModelWrapper model = transformers.AutoModelForSequenceClassification.from_pretrained( @@ -56,3 +57,6 @@ def test_use(): usem = USEMetric().calculate(results) assert usem["avg_attack_use_score"] == 0.76 + + adv_score = AdvancedAttackMetric(["use", "perplexity"]).calculate(results) + assert adv_score["use"]["avg_attack_use_score"] == 0.76 diff --git a/textattack/__init__.py b/textattack/__init__.py index a169173eb..306294062 100644 --- a/textattack/__init__.py +++ b/textattack/__init__.py @@ -2,7 +2,8 @@ What is TextAttack? -`TextAttack `__ is a Python framework for adversarial attacks, adversarial training, and data augmentation in NLP. +`TextAttack `__ +is a Python framework for adversarial attacks, adversarial training, and data augmentation in NLP. TextAttack makes experimenting with the robustness of NLP models seamless, fast, and easy. It's also useful for NLP model training, adversarial training, and data augmentation. diff --git a/textattack/attack_args.py b/textattack/attack_args.py index 0491ea17e..38a7bd25d 100644 --- a/textattack/attack_args.py +++ b/textattack/attack_args.py @@ -128,6 +128,7 @@ @dataclass class AttackArgs: """Attack arguments to be passed to :class:`~textattack.Attacker`. + Args: num_examples (:obj:`int`, 'optional`, defaults to :obj:`10`): The number of examples to attack. :obj:`-1` for entire dataset. @@ -464,7 +465,9 @@ def create_loggers_from_args(cls, args): @dataclass class _CommandLineAttackArgs: - """Attack args for command line execution. This requires more arguments to + """Attack args for command line execution. + + This requires more arguments to create ``Attack`` object as specified. Args: transformation (:obj:`str`, `optional`, defaults to :obj:`"word-swap-embedding"`): diff --git a/textattack/attack_recipes/clare_li_2020.py b/textattack/attack_recipes/clare_li_2020.py index e183b20e0..6971b0489 100644 --- a/textattack/attack_recipes/clare_li_2020.py +++ b/textattack/attack_recipes/clare_li_2020.py @@ -29,7 +29,8 @@ class CLARE2020(AttackRecipe): """Li, Zhang, Peng, Chen, Brockett, Sun, Dolan. - "Contextualized Perturbation for Textual Adversarial Attack" (Li et al., 2020) + "Contextualized Perturbation for Textual Adversarial Attack" (Li et + al., 2020) https://arxiv.org/abs/2009.07502 diff --git a/textattack/attack_recipes/deepwordbug_gao_2018.py b/textattack/attack_recipes/deepwordbug_gao_2018.py index ff44f090a..dd1fe940c 100644 --- a/textattack/attack_recipes/deepwordbug_gao_2018.py +++ b/textattack/attack_recipes/deepwordbug_gao_2018.py @@ -28,8 +28,8 @@ class DeepWordBugGao2018(AttackRecipe): """Gao, Lanchantin, Soffa, Qi. - Black-box Generation of Adversarial Text Sequences to Evade Deep Learning - Classifiers. + Black-box Generation of Adversarial Text Sequences to Evade Deep + Learning Classifiers. https://arxiv.org/abs/1801.04354 """ diff --git a/textattack/attack_recipes/morpheus_tan_2020.py b/textattack/attack_recipes/morpheus_tan_2020.py index b98360a53..74b61f04d 100644 --- a/textattack/attack_recipes/morpheus_tan_2020.py +++ b/textattack/attack_recipes/morpheus_tan_2020.py @@ -20,7 +20,8 @@ class MorpheusTan2020(AttackRecipe): """Samson Tan, Shafiq Joty, Min-Yen Kan, Richard Socher. - It’s Morphin’ Time! Combating Linguistic Discrimination with Inflectional Perturbations + It’s Morphin’ Time! Combating Linguistic Discrimination with + Inflectional Perturbations https://www.aclweb.org/anthology/2020.acl-main.263/ """ diff --git a/textattack/attack_recipes/pwws_ren_2019.py b/textattack/attack_recipes/pwws_ren_2019.py index b53fd0930..bf544c302 100644 --- a/textattack/attack_recipes/pwws_ren_2019.py +++ b/textattack/attack_recipes/pwws_ren_2019.py @@ -23,11 +23,12 @@ class PWWSRen2019(AttackRecipe): Natural Language Adversarial Examples through Probability Weighted Word Saliency", Ren et al., 2019. - Words are prioritized for a synonym-swap transformation based on - a combination of their saliency score and maximum word-swap effectiveness. - Note that this implementation does not include the Named - Entity adversarial swap from the original paper, because it requires - access to the full dataset and ground truth labels in advance. + Words are prioritized for a synonym-swap transformation based on a + combination of their saliency score and maximum word-swap + effectiveness. Note that this implementation does not include the + Named Entity adversarial swap from the original paper, because it + requires access to the full dataset and ground truth labels in + advance. https://www.aclweb.org/anthology/P19-1103/ """ diff --git a/textattack/attack_recipes/textfooler_jin_2019.py b/textattack/attack_recipes/textfooler_jin_2019.py index 1181b3a4a..9c635828f 100644 --- a/textattack/attack_recipes/textfooler_jin_2019.py +++ b/textattack/attack_recipes/textfooler_jin_2019.py @@ -25,7 +25,8 @@ class TextFoolerJin2019(AttackRecipe): """Jin, D., Jin, Z., Zhou, J.T., & Szolovits, P. (2019). - Is BERT Really Robust? Natural Language Attack on Text Classification and Entailment. + Is BERT Really Robust? Natural Language Attack on Text + Classification and Entailment. https://arxiv.org/abs/1907.11932 """ diff --git a/textattack/augmentation/recipes.py b/textattack/augmentation/recipes.py index e407fe7f2..fe647d9d9 100644 --- a/textattack/augmentation/recipes.py +++ b/textattack/augmentation/recipes.py @@ -190,7 +190,8 @@ def __init__(self, **kwargs): class CLAREAugmenter(Augmenter): """Li, Zhang, Peng, Chen, Brockett, Sun, Dolan. - "Contextualized Perturbation for Textual Adversarial Attack" (Li et al., 2020) + "Contextualized Perturbation for Textual Adversarial Attack" (Li et + al., 2020) https://arxiv.org/abs/2009.07502 diff --git a/textattack/constraints/grammaticality/language_models/google_language_model/lm_data_utils.py b/textattack/constraints/grammaticality/language_models/google_language_model/lm_data_utils.py index 2f2f7199b..6ab38d956 100644 --- a/textattack/constraints/grammaticality/language_models/google_language_model/lm_data_utils.py +++ b/textattack/constraints/grammaticality/language_models/google_language_model/lm_data_utils.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - - """ A library for loading 1B word benchmark dataset. ------------------------------------------------ diff --git a/textattack/constraints/grammaticality/language_models/learning_to_write/learning_to_write.py b/textattack/constraints/grammaticality/language_models/learning_to_write/learning_to_write.py index 5da6e59ba..9fb9f9bb0 100644 --- a/textattack/constraints/grammaticality/language_models/learning_to_write/learning_to_write.py +++ b/textattack/constraints/grammaticality/language_models/learning_to_write/learning_to_write.py @@ -17,17 +17,16 @@ class LearningToWriteLanguageModel(LanguageModelConstraint): """A constraint based on the L2W language model. - The RNN-based language model from "Learning to Write With Cooperative - Discriminators" (Holtzman et al, 2018). + The RNN-based language model from "Learning to Write With + Cooperative Discriminators" (Holtzman et al, 2018). https://arxiv.org/pdf/1805.06087.pdf https://github.com/windweller/l2w - - Reused by Jia et al., 2019, as a substitution for the Google 1-billion - words language model (in a revised version the attack of Alzantot et - al., 2018). + Reused by Jia et al., 2019, as a substitution for the Google + 1-billion words language model (in a revised version the attack of + Alzantot et al., 2018). https://worksheets.codalab.org/worksheets/0x79feda5f1998497db75422eca8fcd689 """ diff --git a/textattack/constraints/pre_transformation/min_word_length.py b/textattack/constraints/pre_transformation/min_word_length.py index 991a67539..0f965ea6a 100644 --- a/textattack/constraints/pre_transformation/min_word_length.py +++ b/textattack/constraints/pre_transformation/min_word_length.py @@ -12,7 +12,8 @@ class MinWordLength(PreTransformationConstraint): """A constraint that prevents modifications to words less than a certain word character-length. - :param min_length: Minimum word character-length needed for changes to be made to a word. + :param min_length: Minimum word character-length needed for changes + to be made to a word. """ def __init__(self, min_length): diff --git a/textattack/constraints/semantics/sentence_encoders/bert/bert.py b/textattack/constraints/semantics/sentence_encoders/bert/bert.py index cbbc8c426..f409fdc44 100644 --- a/textattack/constraints/semantics/sentence_encoders/bert/bert.py +++ b/textattack/constraints/semantics/sentence_encoders/bert/bert.py @@ -15,7 +15,9 @@ class BERT(SentenceEncoder): """Constraint using similarity between sentence encodings of x and x_adv where the text embeddings are created using BERT, trained on NLI data, and fine- tuned on the STS benchmark dataset. - Available models can be found here: https://huggingface.co/sentence-transformers""" + + Available models can be found here: https://huggingface.co/sentence-transformers + """ def __init__( self, diff --git a/textattack/metrics/metric.py b/textattack/metrics/metric.py index 015046c62..3e221dc2f 100644 --- a/textattack/metrics/metric.py +++ b/textattack/metrics/metric.py @@ -8,7 +8,7 @@ class Metric(ABC): - """A metric for evaluating Adversarial Attack candidates.""" + """A metric for evaluating results and data quality.""" @abstractmethod def __init__(self, **kwargs): diff --git a/textattack/metrics/recipe.py b/textattack/metrics/recipe.py new file mode 100644 index 000000000..4e3fda966 --- /dev/null +++ b/textattack/metrics/recipe.py @@ -0,0 +1,30 @@ +""" +Attack Metric Quality Recipes: +============================== + +""" +import random + +from .metric import Metric + + +class AdvancedAttackMetric(Metric): + """Calculate a suite of advanced metrics to evaluate attackResults' + quality.""" + + def __init__(self, choices=["use"]): + self.achoices = choices + + def calculate(self, results): + advanced_metrics = {} + if "use" in self.achoices: + advanced_metrics["use"] = USEMetric().calculate(results) + if "perplexity" in self.achoices: + advanced_metrics["perplexity"] = Perplexity().calculate(results) + if "bert_score" in self.achoices: + advanced_metrics["bert_score"] = BERTScoreMetric().calculate(results) + if "meteor_score" in self.achoices: + advanced_metrics["meteor_score"] = MeteorMetric().calculate(results) + if "sbert_score" in self.achoices: + advanced_metrics["sbert_score"] = SBERTMetric().calculate(results) + return advanced_metrics diff --git a/textattack/models/helpers/glove_embedding_layer.py b/textattack/models/helpers/glove_embedding_layer.py index 2d1a5083b..5a3e2b907 100644 --- a/textattack/models/helpers/glove_embedding_layer.py +++ b/textattack/models/helpers/glove_embedding_layer.py @@ -16,9 +16,9 @@ class EmbeddingLayer(nn.Module): """A layer of a model that replaces word IDs with their embeddings. - This is a useful abstraction for any nn.module which wants to take word IDs - (a sequence of text) as input layer but actually manipulate words' - embeddings. + This is a useful abstraction for any nn.module which wants to take + word IDs (a sequence of text) as input layer but actually manipulate + words' embeddings. Requires some pre-trained embedding with associated word IDs. """ diff --git a/textattack/models/wrappers/model_wrapper.py b/textattack/models/wrappers/model_wrapper.py index c94419463..f3ab13ccd 100644 --- a/textattack/models/wrappers/model_wrapper.py +++ b/textattack/models/wrappers/model_wrapper.py @@ -10,11 +10,12 @@ class ModelWrapper(ABC): """A model wrapper queries a model with a list of text inputs. - Classification-based models return a list of lists, where each sublist - represents the model's scores for a given input. + Classification-based models return a list of lists, where each + sublist represents the model's scores for a given input. - Text-to-text models return a list of strings, where each string is the - output – like a translation or summarization – for a given input. + Text-to-text models return a list of strings, where each string is + the output – like a translation or summarization – for a given + input. """ @abstractmethod diff --git a/textattack/shared/attacked_text.py b/textattack/shared/attacked_text.py index 4616b467e..c82dd86a8 100644 --- a/textattack/shared/attacked_text.py +++ b/textattack/shared/attacked_text.py @@ -25,7 +25,6 @@ class AttackedText: - """A helper class that represents a string that can be attacked. Models that take multiple sentences as input separate them by ``SPLIT_TOKEN``. diff --git a/textattack/shared/utils/strings.py b/textattack/shared/utils/strings.py index 7b137d174..4a7bff61d 100644 --- a/textattack/shared/utils/strings.py +++ b/textattack/shared/utils/strings.py @@ -86,7 +86,7 @@ def __repr__(self): __str__ = __repr__ def extra_repr_keys(self): - """extra fields to be included in the representation of a class.""" + """Extra fields to be included in the representation of a class.""" return [] @@ -164,7 +164,7 @@ class ANSI_ESCAPE_CODES: FAIL = "\033[91m" BOLD = "\033[1m" UNDERLINE = "\033[4m" - """ This color stops the current color sequence. """ + """This color stops the current color sequence.""" STOP = "\033[0m" diff --git a/textattack/transformations/word_swaps/word_swap_change_number.py b/textattack/transformations/word_swaps/word_swap_change_number.py index b885b6fa4..d30df1cfe 100644 --- a/textattack/transformations/word_swaps/word_swap_change_number.py +++ b/textattack/transformations/word_swaps/word_swap_change_number.py @@ -100,7 +100,7 @@ def _get_new_number(self, word): return [] def _alter_number(self, num): - """helper function of _get_new_number, replace a number with another + """Helper function of _get_new_number, replace a number with another random number within the range of self.max_change.""" if num not in [0, 2, 4]: change = int(num * self.max_change) + 1 From 5b66ec232dc6921d5e7ffad815c88eb7c1a7f1a2 Mon Sep 17 00:00:00 2001 From: Yanjun Qi Date: Sat, 30 Sep 2023 13:25:09 -0400 Subject: [PATCH 3/5] minor format change.. also fixed metric recipe error --- tests/test_attacked_text.py | 2 +- tests/test_metric_api.py | 43 ++++++++++++++++--- tests/test_word_embedding.py | 4 +- textattack/attack.py | 4 +- textattack/attack_args.py | 4 +- textattack/constraints/grammaticality/cola.py | 2 +- .../google_language_model/alzantot_goog_lm.py | 2 +- .../grammaticality/part_of_speech.py | 2 +- .../sentence_encoders/thought_vector.py | 2 +- textattack/goal_functions/goal_function.py | 2 +- .../goal_functions/text/minimize_bleu.py | 2 +- .../text/non_overlapping_output.py | 4 +- .../metrics/attack_metrics/words_perturbed.py | 2 +- textattack/metrics/recipe.py | 16 ++++--- textattack/shared/validators.py | 5 +-- 15 files changed, 64 insertions(+), 32 deletions(-) diff --git a/tests/test_attacked_text.py b/tests/test_attacked_text.py index 6aff12fbc..50bdf86b4 100644 --- a/tests/test_attacked_text.py +++ b/tests/test_attacked_text.py @@ -70,7 +70,7 @@ def test_window_around_index(self, attacked_text): def test_big_window_around_index(self, attacked_text): assert ( - attacked_text.text_window_around_index(0, 10**5) + "." + attacked_text.text_window_around_index(0, 10 ** 5) + "." ) == attacked_text.text def test_window_around_index_start(self, attacked_text): diff --git a/tests/test_metric_api.py b/tests/test_metric_api.py index 519139a1a..77c9ed589 100644 --- a/tests/test_metric_api.py +++ b/tests/test_metric_api.py @@ -30,8 +30,7 @@ def test_use(): from textattack import AttackArgs, Attacker from textattack.attack_recipes import DeepWordBugGao2018 from textattack.datasets import HuggingFaceDataset - from textattack.metrics.quality_metrics import USEMetric - from textattack.metrics.recipe import AdvancedAttackMetric + from textattack.metrics.quality_metrics import MeteorMetric from textattack.models.wrappers import HuggingFaceModelWrapper model = transformers.AutoModelForSequenceClassification.from_pretrained( @@ -51,12 +50,42 @@ def test_use(): disable_stdout=True, ) attacker = Attacker(attack, dataset, attack_args) - results = attacker.attack_dataset() - usem = USEMetric().calculate(results) + usem = MeteorMetric().calculate(results) + + assert usem["avg_attack_meteor_score"] == 0.71 + - assert usem["avg_attack_use_score"] == 0.76 +def test_metric_recipe(): + + import transformers + + from textattack import AttackArgs, Attacker + from textattack.attack_recipes import DeepWordBugGao2018 + from textattack.datasets import HuggingFaceDataset + from textattack.metrics.quality_metrics import USEMetric + from textattack.metrics.recipe import AdvancedAttackMetric + from textattack.models.wrappers import HuggingFaceModelWrapper + + model = transformers.AutoModelForSequenceClassification.from_pretrained( + "distilbert-base-uncased-finetuned-sst-2-english" + ) + tokenizer = transformers.AutoTokenizer.from_pretrained( + "distilbert-base-uncased-finetuned-sst-2-english" + ) + model_wrapper = HuggingFaceModelWrapper(model, tokenizer) + attack = DeepWordBugGao2018.build(model_wrapper) + dataset = HuggingFaceDataset("glue", "sst2", split="train") + attack_args = AttackArgs( + num_examples=1, + log_to_csv="log.csv", + checkpoint_interval=5, + checkpoint_dir="checkpoints", + disable_stdout=True, + ) + attacker = Attacker(attack, dataset, attack_args) + results = attacker.attack_dataset() - adv_score = AdvancedAttackMetric(["use", "perplexity"]).calculate(results) - assert adv_score["use"]["avg_attack_use_score"] == 0.76 + adv_score = AdvancedAttackMetric(["meteor_score", "perplexity"]).calculate(results) + assert adv_score["avg_attack_meteor_score"] == 0.71 diff --git a/tests/test_word_embedding.py b/tests/test_word_embedding.py index 4772c27dd..5232e8fa1 100644 --- a/tests/test_word_embedding.py +++ b/tests/test_word_embedding.py @@ -10,7 +10,7 @@ def test_embedding_paragramcf(): word_embedding = WordEmbedding.counterfitted_GLOVE_embedding() assert pytest.approx(word_embedding[0][0]) == -0.022007 assert pytest.approx(word_embedding["fawn"][0]) == -0.022007 - assert word_embedding[10**9] is None + assert word_embedding[10 ** 9] is None def test_embedding_gensim(): @@ -37,7 +37,7 @@ def test_embedding_gensim(): word_embedding = GensimWordEmbedding(keyed_vectors) assert pytest.approx(word_embedding[0][0]) == 1 assert pytest.approx(word_embedding["bye-bye"][0]) == -1 / np.sqrt(2) - assert word_embedding[10**9] is None + assert word_embedding[10 ** 9] is None # test query functionality assert pytest.approx(word_embedding.get_cos_sim(1, 3)) == 0 diff --git a/textattack/attack.py b/textattack/attack.py index 47537d1b0..7743817ab 100644 --- a/textattack/attack.py +++ b/textattack/attack.py @@ -83,8 +83,8 @@ def __init__( constraints: List[Union[Constraint, PreTransformationConstraint]], transformation: Transformation, search_method: SearchMethod, - transformation_cache_size=2**15, - constraint_cache_size=2**15, + transformation_cache_size=2 ** 15, + constraint_cache_size=2 ** 15, ): """Initialize an attack object. diff --git a/textattack/attack_args.py b/textattack/attack_args.py index 38a7bd25d..f47060c9f 100644 --- a/textattack/attack_args.py +++ b/textattack/attack_args.py @@ -507,8 +507,8 @@ class _CommandLineAttackArgs: interactive: bool = False parallel: bool = False model_batch_size: int = 32 - model_cache_size: int = 2**18 - constraint_cache_size: int = 2**18 + model_cache_size: int = 2 ** 18 + constraint_cache_size: int = 2 ** 18 @classmethod def _add_parser_args(cls, parser): diff --git a/textattack/constraints/grammaticality/cola.py b/textattack/constraints/grammaticality/cola.py index 190bad25c..beb7c30a6 100644 --- a/textattack/constraints/grammaticality/cola.py +++ b/textattack/constraints/grammaticality/cola.py @@ -43,7 +43,7 @@ def __init__( self.max_diff = max_diff self.model_name = model_name - self._reference_score_cache = lru.LRU(2**10) + self._reference_score_cache = lru.LRU(2 ** 10) model = AutoModelForSequenceClassification.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) self.model = HuggingFaceModelWrapper(model, tokenizer) diff --git a/textattack/constraints/grammaticality/language_models/google_language_model/alzantot_goog_lm.py b/textattack/constraints/grammaticality/language_models/google_language_model/alzantot_goog_lm.py index 005dda55e..d47bfd6ec 100644 --- a/textattack/constraints/grammaticality/language_models/google_language_model/alzantot_goog_lm.py +++ b/textattack/constraints/grammaticality/language_models/google_language_model/alzantot_goog_lm.py @@ -49,7 +49,7 @@ def __init__(self): self.sess, self.graph, self.PBTXT_PATH, self.CKPT_PATH ) - self.lm_cache = lru.LRU(2**18) + self.lm_cache = lru.LRU(2 ** 18) def clear_cache(self): self.lm_cache.clear() diff --git a/textattack/constraints/grammaticality/part_of_speech.py b/textattack/constraints/grammaticality/part_of_speech.py index f531f33c7..e686efdb1 100644 --- a/textattack/constraints/grammaticality/part_of_speech.py +++ b/textattack/constraints/grammaticality/part_of_speech.py @@ -56,7 +56,7 @@ def __init__( self.language_nltk = language_nltk self.language_stanza = language_stanza - self._pos_tag_cache = lru.LRU(2**14) + self._pos_tag_cache = lru.LRU(2 ** 14) if tagger_type == "flair": if tagset == "universal": self._flair_pos_tagger = SequenceTagger.load("upos-fast") diff --git a/textattack/constraints/semantics/sentence_encoders/thought_vector.py b/textattack/constraints/semantics/sentence_encoders/thought_vector.py index 4a7978b01..60bac23ba 100644 --- a/textattack/constraints/semantics/sentence_encoders/thought_vector.py +++ b/textattack/constraints/semantics/sentence_encoders/thought_vector.py @@ -32,7 +32,7 @@ def __init__(self, embedding=None, **kwargs): def clear_cache(self): self._get_thought_vector.cache_clear() - @functools.lru_cache(maxsize=2**10) + @functools.lru_cache(maxsize=2 ** 10) def _get_thought_vector(self, text): """Sums the embeddings of all the words in ``text`` into a "thought vector".""" diff --git a/textattack/goal_functions/goal_function.py b/textattack/goal_functions/goal_function.py index 5d51bdf05..7fa6a7c3e 100644 --- a/textattack/goal_functions/goal_function.py +++ b/textattack/goal_functions/goal_function.py @@ -40,7 +40,7 @@ def __init__( use_cache=True, query_budget=float("inf"), model_batch_size=32, - model_cache_size=2**20, + model_cache_size=2 ** 20, ): validators.validate_model_goal_function_compatibility( self.__class__, model_wrapper.model.__class__ diff --git a/textattack/goal_functions/text/minimize_bleu.py b/textattack/goal_functions/text/minimize_bleu.py index 92613be5a..339995772 100644 --- a/textattack/goal_functions/text/minimize_bleu.py +++ b/textattack/goal_functions/text/minimize_bleu.py @@ -59,7 +59,7 @@ def extra_repr_keys(self): return ["maximizable", "target_bleu"] -@functools.lru_cache(maxsize=2**12) +@functools.lru_cache(maxsize=2 ** 12) def get_bleu(a, b): ref = a.words hyp = b.words diff --git a/textattack/goal_functions/text/non_overlapping_output.py b/textattack/goal_functions/text/non_overlapping_output.py index e2cb49820..443aa2366 100644 --- a/textattack/goal_functions/text/non_overlapping_output.py +++ b/textattack/goal_functions/text/non_overlapping_output.py @@ -38,12 +38,12 @@ def _get_score(self, model_output, _): return num_words_diff / len(get_words_cached(self.ground_truth_output)) -@functools.lru_cache(maxsize=2**12) +@functools.lru_cache(maxsize=2 ** 12) def get_words_cached(s): return np.array(words_from_text(s)) -@functools.lru_cache(maxsize=2**12) +@functools.lru_cache(maxsize=2 ** 12) def word_difference_score(s1, s2): """Returns the number of words that are non-overlapping between s1 and s2.""" diff --git a/textattack/metrics/attack_metrics/words_perturbed.py b/textattack/metrics/attack_metrics/words_perturbed.py index 6104de1b3..d4b128241 100644 --- a/textattack/metrics/attack_metrics/words_perturbed.py +++ b/textattack/metrics/attack_metrics/words_perturbed.py @@ -31,7 +31,7 @@ def calculate(self, results): self.total_attacks = len(self.results) self.all_num_words = np.zeros(len(self.results)) self.perturbed_word_percentages = np.zeros(len(self.results)) - self.num_words_changed_until_success = np.zeros(2**16) + self.num_words_changed_until_success = np.zeros(2 ** 16) self.max_words_changed = 0 for i, result in enumerate(self.results): diff --git a/textattack/metrics/recipe.py b/textattack/metrics/recipe.py index 4e3fda966..68bf0be44 100644 --- a/textattack/metrics/recipe.py +++ b/textattack/metrics/recipe.py @@ -5,6 +5,12 @@ """ import random +from textattack.metrics.quality_metrics.bert_score import BERTScoreMetric +from textattack.metrics.quality_metrics.meteor_score import MeteorMetric +from textattack.metrics.quality_metrics.perplexity import Perplexity +from textattack.metrics.quality_metrics.sentence_bert import SBERTMetric +from textattack.metrics.quality_metrics.use import USEMetric + from .metric import Metric @@ -18,13 +24,13 @@ def __init__(self, choices=["use"]): def calculate(self, results): advanced_metrics = {} if "use" in self.achoices: - advanced_metrics["use"] = USEMetric().calculate(results) + advanced_metrics.update(USEMetric().calculate(results)) if "perplexity" in self.achoices: - advanced_metrics["perplexity"] = Perplexity().calculate(results) + advanced_metrics.update(Perplexity().calculate(results)) if "bert_score" in self.achoices: - advanced_metrics["bert_score"] = BERTScoreMetric().calculate(results) + advanced_metrics.update(BERTScoreMetric().calculate(results)) if "meteor_score" in self.achoices: - advanced_metrics["meteor_score"] = MeteorMetric().calculate(results) + advanced_metrics.update(MeteorMetric().calculate(results)) if "sbert_score" in self.achoices: - advanced_metrics["sbert_score"] = SBERTMetric().calculate(results) + advanced_metrics.update(SBERTMetric().calculate(results)) return advanced_metrics diff --git a/textattack/shared/validators.py b/textattack/shared/validators.py index 45513a2a3..55f4ed08c 100644 --- a/textattack/shared/validators.py +++ b/textattack/shared/validators.py @@ -25,10 +25,7 @@ r"^textattack.models.helpers.word_cnn_for_classification.*", r"^transformers.modeling_\w*\.\w*ForSequenceClassification$", ], - ( - NonOverlappingOutput, - MinimizeBleu, - ): [ + (NonOverlappingOutput, MinimizeBleu,): [ r"^textattack.models.helpers.t5_for_text_to_text.*", ], } From 0f6ee867dc6936465c881d115d681f765432373f Mon Sep 17 00:00:00 2001 From: Yanjun Qi Date: Sat, 30 Sep 2023 13:34:49 -0400 Subject: [PATCH 4/5] black updates .. then re-make format --- tests/test_attacked_text.py | 2 +- tests/test_metric_api.py | 1 - tests/test_word_embedding.py | 4 ++-- textattack/attack.py | 4 ++-- textattack/attack_args.py | 4 ++-- textattack/constraints/grammaticality/cola.py | 2 +- .../google_language_model/alzantot_goog_lm.py | 2 +- textattack/constraints/grammaticality/part_of_speech.py | 2 +- .../semantics/sentence_encoders/thought_vector.py | 2 +- textattack/goal_functions/goal_function.py | 2 +- textattack/goal_functions/text/minimize_bleu.py | 2 +- textattack/goal_functions/text/non_overlapping_output.py | 4 ++-- textattack/metrics/attack_metrics/words_perturbed.py | 2 +- textattack/shared/validators.py | 5 ++++- 14 files changed, 20 insertions(+), 18 deletions(-) diff --git a/tests/test_attacked_text.py b/tests/test_attacked_text.py index 50bdf86b4..6aff12fbc 100644 --- a/tests/test_attacked_text.py +++ b/tests/test_attacked_text.py @@ -70,7 +70,7 @@ def test_window_around_index(self, attacked_text): def test_big_window_around_index(self, attacked_text): assert ( - attacked_text.text_window_around_index(0, 10 ** 5) + "." + attacked_text.text_window_around_index(0, 10**5) + "." ) == attacked_text.text def test_window_around_index_start(self, attacked_text): diff --git a/tests/test_metric_api.py b/tests/test_metric_api.py index 77c9ed589..49836e107 100644 --- a/tests/test_metric_api.py +++ b/tests/test_metric_api.py @@ -58,7 +58,6 @@ def test_use(): def test_metric_recipe(): - import transformers from textattack import AttackArgs, Attacker diff --git a/tests/test_word_embedding.py b/tests/test_word_embedding.py index 5232e8fa1..4772c27dd 100644 --- a/tests/test_word_embedding.py +++ b/tests/test_word_embedding.py @@ -10,7 +10,7 @@ def test_embedding_paragramcf(): word_embedding = WordEmbedding.counterfitted_GLOVE_embedding() assert pytest.approx(word_embedding[0][0]) == -0.022007 assert pytest.approx(word_embedding["fawn"][0]) == -0.022007 - assert word_embedding[10 ** 9] is None + assert word_embedding[10**9] is None def test_embedding_gensim(): @@ -37,7 +37,7 @@ def test_embedding_gensim(): word_embedding = GensimWordEmbedding(keyed_vectors) assert pytest.approx(word_embedding[0][0]) == 1 assert pytest.approx(word_embedding["bye-bye"][0]) == -1 / np.sqrt(2) - assert word_embedding[10 ** 9] is None + assert word_embedding[10**9] is None # test query functionality assert pytest.approx(word_embedding.get_cos_sim(1, 3)) == 0 diff --git a/textattack/attack.py b/textattack/attack.py index 7743817ab..47537d1b0 100644 --- a/textattack/attack.py +++ b/textattack/attack.py @@ -83,8 +83,8 @@ def __init__( constraints: List[Union[Constraint, PreTransformationConstraint]], transformation: Transformation, search_method: SearchMethod, - transformation_cache_size=2 ** 15, - constraint_cache_size=2 ** 15, + transformation_cache_size=2**15, + constraint_cache_size=2**15, ): """Initialize an attack object. diff --git a/textattack/attack_args.py b/textattack/attack_args.py index f47060c9f..38a7bd25d 100644 --- a/textattack/attack_args.py +++ b/textattack/attack_args.py @@ -507,8 +507,8 @@ class _CommandLineAttackArgs: interactive: bool = False parallel: bool = False model_batch_size: int = 32 - model_cache_size: int = 2 ** 18 - constraint_cache_size: int = 2 ** 18 + model_cache_size: int = 2**18 + constraint_cache_size: int = 2**18 @classmethod def _add_parser_args(cls, parser): diff --git a/textattack/constraints/grammaticality/cola.py b/textattack/constraints/grammaticality/cola.py index beb7c30a6..190bad25c 100644 --- a/textattack/constraints/grammaticality/cola.py +++ b/textattack/constraints/grammaticality/cola.py @@ -43,7 +43,7 @@ def __init__( self.max_diff = max_diff self.model_name = model_name - self._reference_score_cache = lru.LRU(2 ** 10) + self._reference_score_cache = lru.LRU(2**10) model = AutoModelForSequenceClassification.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) self.model = HuggingFaceModelWrapper(model, tokenizer) diff --git a/textattack/constraints/grammaticality/language_models/google_language_model/alzantot_goog_lm.py b/textattack/constraints/grammaticality/language_models/google_language_model/alzantot_goog_lm.py index d47bfd6ec..005dda55e 100644 --- a/textattack/constraints/grammaticality/language_models/google_language_model/alzantot_goog_lm.py +++ b/textattack/constraints/grammaticality/language_models/google_language_model/alzantot_goog_lm.py @@ -49,7 +49,7 @@ def __init__(self): self.sess, self.graph, self.PBTXT_PATH, self.CKPT_PATH ) - self.lm_cache = lru.LRU(2 ** 18) + self.lm_cache = lru.LRU(2**18) def clear_cache(self): self.lm_cache.clear() diff --git a/textattack/constraints/grammaticality/part_of_speech.py b/textattack/constraints/grammaticality/part_of_speech.py index e686efdb1..f531f33c7 100644 --- a/textattack/constraints/grammaticality/part_of_speech.py +++ b/textattack/constraints/grammaticality/part_of_speech.py @@ -56,7 +56,7 @@ def __init__( self.language_nltk = language_nltk self.language_stanza = language_stanza - self._pos_tag_cache = lru.LRU(2 ** 14) + self._pos_tag_cache = lru.LRU(2**14) if tagger_type == "flair": if tagset == "universal": self._flair_pos_tagger = SequenceTagger.load("upos-fast") diff --git a/textattack/constraints/semantics/sentence_encoders/thought_vector.py b/textattack/constraints/semantics/sentence_encoders/thought_vector.py index 60bac23ba..4a7978b01 100644 --- a/textattack/constraints/semantics/sentence_encoders/thought_vector.py +++ b/textattack/constraints/semantics/sentence_encoders/thought_vector.py @@ -32,7 +32,7 @@ def __init__(self, embedding=None, **kwargs): def clear_cache(self): self._get_thought_vector.cache_clear() - @functools.lru_cache(maxsize=2 ** 10) + @functools.lru_cache(maxsize=2**10) def _get_thought_vector(self, text): """Sums the embeddings of all the words in ``text`` into a "thought vector".""" diff --git a/textattack/goal_functions/goal_function.py b/textattack/goal_functions/goal_function.py index 7fa6a7c3e..5d51bdf05 100644 --- a/textattack/goal_functions/goal_function.py +++ b/textattack/goal_functions/goal_function.py @@ -40,7 +40,7 @@ def __init__( use_cache=True, query_budget=float("inf"), model_batch_size=32, - model_cache_size=2 ** 20, + model_cache_size=2**20, ): validators.validate_model_goal_function_compatibility( self.__class__, model_wrapper.model.__class__ diff --git a/textattack/goal_functions/text/minimize_bleu.py b/textattack/goal_functions/text/minimize_bleu.py index 339995772..92613be5a 100644 --- a/textattack/goal_functions/text/minimize_bleu.py +++ b/textattack/goal_functions/text/minimize_bleu.py @@ -59,7 +59,7 @@ def extra_repr_keys(self): return ["maximizable", "target_bleu"] -@functools.lru_cache(maxsize=2 ** 12) +@functools.lru_cache(maxsize=2**12) def get_bleu(a, b): ref = a.words hyp = b.words diff --git a/textattack/goal_functions/text/non_overlapping_output.py b/textattack/goal_functions/text/non_overlapping_output.py index 443aa2366..e2cb49820 100644 --- a/textattack/goal_functions/text/non_overlapping_output.py +++ b/textattack/goal_functions/text/non_overlapping_output.py @@ -38,12 +38,12 @@ def _get_score(self, model_output, _): return num_words_diff / len(get_words_cached(self.ground_truth_output)) -@functools.lru_cache(maxsize=2 ** 12) +@functools.lru_cache(maxsize=2**12) def get_words_cached(s): return np.array(words_from_text(s)) -@functools.lru_cache(maxsize=2 ** 12) +@functools.lru_cache(maxsize=2**12) def word_difference_score(s1, s2): """Returns the number of words that are non-overlapping between s1 and s2.""" diff --git a/textattack/metrics/attack_metrics/words_perturbed.py b/textattack/metrics/attack_metrics/words_perturbed.py index d4b128241..6104de1b3 100644 --- a/textattack/metrics/attack_metrics/words_perturbed.py +++ b/textattack/metrics/attack_metrics/words_perturbed.py @@ -31,7 +31,7 @@ def calculate(self, results): self.total_attacks = len(self.results) self.all_num_words = np.zeros(len(self.results)) self.perturbed_word_percentages = np.zeros(len(self.results)) - self.num_words_changed_until_success = np.zeros(2 ** 16) + self.num_words_changed_until_success = np.zeros(2**16) self.max_words_changed = 0 for i, result in enumerate(self.results): diff --git a/textattack/shared/validators.py b/textattack/shared/validators.py index 55f4ed08c..45513a2a3 100644 --- a/textattack/shared/validators.py +++ b/textattack/shared/validators.py @@ -25,7 +25,10 @@ r"^textattack.models.helpers.word_cnn_for_classification.*", r"^transformers.modeling_\w*\.\w*ForSequenceClassification$", ], - (NonOverlappingOutput, MinimizeBleu,): [ + ( + NonOverlappingOutput, + MinimizeBleu, + ): [ r"^textattack.models.helpers.t5_for_text_to_text.*", ], } From 6582fcfd21d11b4ae2da6965a763c0b0330b988b Mon Sep 17 00:00:00 2001 From: Yanjun Qi Date: Sat, 30 Sep 2023 13:47:07 -0400 Subject: [PATCH 5/5] remove two unneeded imports --- tests/test_metric_api.py | 1 - textattack/metrics/recipe.py | 1 - 2 files changed, 2 deletions(-) diff --git a/tests/test_metric_api.py b/tests/test_metric_api.py index 49836e107..aaff527a8 100644 --- a/tests/test_metric_api.py +++ b/tests/test_metric_api.py @@ -63,7 +63,6 @@ def test_metric_recipe(): from textattack import AttackArgs, Attacker from textattack.attack_recipes import DeepWordBugGao2018 from textattack.datasets import HuggingFaceDataset - from textattack.metrics.quality_metrics import USEMetric from textattack.metrics.recipe import AdvancedAttackMetric from textattack.models.wrappers import HuggingFaceModelWrapper diff --git a/textattack/metrics/recipe.py b/textattack/metrics/recipe.py index 68bf0be44..304bd5384 100644 --- a/textattack/metrics/recipe.py +++ b/textattack/metrics/recipe.py @@ -3,7 +3,6 @@ ============================== """ -import random from textattack.metrics.quality_metrics.bert_score import BERTScoreMetric from textattack.metrics.quality_metrics.meteor_score import MeteorMetric