From 4854eb73c53c568e74ff857b0f643dad71a5e1cf Mon Sep 17 00:00:00 2001 From: diegoc Date: Wed, 13 Apr 2022 14:57:22 -0400 Subject: [PATCH 1/2] hard label classification --- textattack/attack_args.py | 7 +--- .../hardlabel_classification.py | 39 +++++++++++++++++++ 2 files changed, 41 insertions(+), 5 deletions(-) create mode 100644 textattack/goal_functions/classification/hardlabel_classification.py diff --git a/textattack/attack_args.py b/textattack/attack_args.py index c3724141c..d1f19dfd2 100644 --- a/textattack/attack_args.py +++ b/textattack/attack_args.py @@ -111,6 +111,7 @@ # # Classification goal functions # + "hardlabel-classification": "textattack.goal_functions.classification.HardLabelClassification", "targeted-classification": "textattack.goal_functions.classification.TargetedClassification", "untargeted-classification": "textattack.goal_functions.classification.UntargetedClassification", "input-reduction": "textattack.goal_functions.classification.InputReduction", @@ -126,7 +127,6 @@ @dataclass class AttackArgs: """Attack arguments to be passed to :class:`~textattack.Attacker`. - Args: num_examples (:obj:`int`, 'optional`, defaults to :obj:`10`): The number of examples to attack. :obj:`-1` for entire dataset. @@ -134,7 +134,6 @@ class AttackArgs: The number of successful adversarial examples we want. This is different from :obj:`num_examples` as :obj:`num_examples` only cares about attacking `N` samples while :obj:`num_successful_examples` aims to keep attacking until we have `N` successful cases. - .. note:: If set, this argument overrides `num_examples` argument. num_examples_offset (:obj: `int`, `optional`, defaults to :obj:`0`): @@ -148,7 +147,6 @@ class AttackArgs: query_budget (:obj:`int`, `optional`, defaults to :obj:`None`): The maximum number of model queries allowed per example attacked. If not set, we use the query budget set in the :class:`~textattack.goal_functions.GoalFunction` object (which by default is :obj:`float("inf")`). - .. note:: Setting this overwrites the query budget set in :class:`~textattack.goal_functions.GoalFunction` object. checkpoint_interval (:obj:`int`, `optional`, defaults to :obj:`None`): @@ -439,7 +437,6 @@ def create_loggers_from_args(cls, args): class _CommandLineAttackArgs: """Attack args for command line execution. This requires more arguments to create ``Attack`` object as specified. - Args: transformation (:obj:`str`, `optional`, defaults to :obj:`"word-swap-embedding"`): Name of transformation to use. @@ -731,4 +728,4 @@ def _add_parser_args(cls, parser): parser = DatasetArgs._add_parser_args(parser) parser = _CommandLineAttackArgs._add_parser_args(parser) parser = AttackArgs._add_parser_args(parser) - return parser + return parser \ No newline at end of file diff --git a/textattack/goal_functions/classification/hardlabel_classification.py b/textattack/goal_functions/classification/hardlabel_classification.py new file mode 100644 index 000000000..f237bc1e1 --- /dev/null +++ b/textattack/goal_functions/classification/hardlabel_classification.py @@ -0,0 +1,39 @@ +""" +Determine if an attack has been successful in Hard Label Classficiation. +---------------------------------------------------- +""" + + +from .classification_goal_function import ClassificationGoalFunction + + +class HardLabelClassification(ClassificationGoalFunction): + """An hard label attack on classification models which attempts to maximize + the semantic similarity of the label such that the target is outside of the decision boundary. + Args: + target_max_score (float): If set, goal is to reduce model output to + below this score. Otherwise, goal is to change the overall predicted + class. + """ + + def __init__(self, *args, target_max_score=None, **kwargs): + self.target_max_score = target_max_score + super().__init__(*args, **kwargs) + + def _is_goal_complete(self, model_output, _): + if self.target_max_score: + return model_output[self.ground_truth_output] < self.target_max_score + elif (model_output.numel() == 1) and isinstance( + self.ground_truth_output, float + ): + return abs(self.ground_truth_output - model_output.item()) >= 0.5 + else: + return model_output.argmax() != self.ground_truth_output + + def _get_score(self, model_output, _): + # If the model outputs a single number and the ground truth output is + # a float, we assume that this is a regression task. + if (model_output.numel() == 1) and isinstance(self.ground_truth_output, float): + return max(model_output.item(), self.ground_truth_output) + else: + return 1 - model_output[self.ground_truth_output] \ No newline at end of file From 87c4671d4ab26830f8a354d14273f2fdef87d00c Mon Sep 17 00:00:00 2001 From: Yanjun Qi Date: Mon, 11 Sep 2023 00:38:07 -0400 Subject: [PATCH 2/2] make format --- textattack/attack_args.py | 2 +- .../classification/hardlabel_classification.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/textattack/attack_args.py b/textattack/attack_args.py index d1f19dfd2..77428c197 100644 --- a/textattack/attack_args.py +++ b/textattack/attack_args.py @@ -728,4 +728,4 @@ def _add_parser_args(cls, parser): parser = DatasetArgs._add_parser_args(parser) parser = _CommandLineAttackArgs._add_parser_args(parser) parser = AttackArgs._add_parser_args(parser) - return parser \ No newline at end of file + return parser diff --git a/textattack/goal_functions/classification/hardlabel_classification.py b/textattack/goal_functions/classification/hardlabel_classification.py index f237bc1e1..60e01e7d2 100644 --- a/textattack/goal_functions/classification/hardlabel_classification.py +++ b/textattack/goal_functions/classification/hardlabel_classification.py @@ -9,7 +9,9 @@ class HardLabelClassification(ClassificationGoalFunction): """An hard label attack on classification models which attempts to maximize - the semantic similarity of the label such that the target is outside of the decision boundary. + the semantic similarity of the label such that the target is outside of the + decision boundary. + Args: target_max_score (float): If set, goal is to reduce model output to below this score. Otherwise, goal is to change the overall predicted @@ -36,4 +38,4 @@ def _get_score(self, model_output, _): if (model_output.numel() == 1) and isinstance(self.ground_truth_output, float): return max(model_output.item(), self.ground_truth_output) else: - return 1 - model_output[self.ground_truth_output] \ No newline at end of file + return 1 - model_output[self.ground_truth_output]