From 82fdd42abbccbbba212d7435ba8550b4a580b5d9 Mon Sep 17 00:00:00 2001 From: liuyuyan2717 <2717108404@qq.com> Date: Wed, 20 Mar 2024 14:06:09 +0800 Subject: [PATCH 1/2] Use cache to achieve faster generation of Chinese HowNet replacement words --- .../chn_transformations/chinese_word_swap_hownet.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/textattack/transformations/word_swaps/chn_transformations/chinese_word_swap_hownet.py b/textattack/transformations/word_swaps/chn_transformations/chinese_word_swap_hownet.py index 73ba26db..9beb8b51 100644 --- a/textattack/transformations/word_swaps/chn_transformations/chinese_word_swap_hownet.py +++ b/textattack/transformations/word_swaps/chn_transformations/chinese_word_swap_hownet.py @@ -15,16 +15,20 @@ class ChineseWordSwapHowNet(WordSwap): def __init__(self, topk=5): self.hownet_dict = OpenHowNet.HowNetDict(init_sim=True) self.topk = topk + self.wordCache = {} def _get_replacement_words(self, word): """Returns a list containing all possible words with N characters replaced by a homoglyph.""" + if word in self.wordCache: # use cache + return self.wordCache[word] results = self.hownet_dict.get_nearest_words(word, language="zh", K=self.topk) synonyms = [] if results: for key, value in results.items(): - for w in value: - synonyms.append(w) + synonyms = synonyms + value[1:] + self.wordCache[word] = synonyms.copy() + break return synonyms else: return [] From c158ab30cb5e254224673ac48e93a08c47d8eaae Mon Sep 17 00:00:00 2001 From: liuyuyan2717 <2717108404@qq.com> Date: Sun, 31 Mar 2024 01:21:51 +0800 Subject: [PATCH 2/2] formatting code --- .../word_swaps/chn_transformations/chinese_word_swap_hownet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/textattack/transformations/word_swaps/chn_transformations/chinese_word_swap_hownet.py b/textattack/transformations/word_swaps/chn_transformations/chinese_word_swap_hownet.py index 9beb8b51..53f5d71e 100644 --- a/textattack/transformations/word_swaps/chn_transformations/chinese_word_swap_hownet.py +++ b/textattack/transformations/word_swaps/chn_transformations/chinese_word_swap_hownet.py @@ -20,7 +20,7 @@ def __init__(self, topk=5): def _get_replacement_words(self, word): """Returns a list containing all possible words with N characters replaced by a homoglyph.""" - if word in self.wordCache: # use cache + if word in self.wordCache: return self.wordCache[word] results = self.hownet_dict.get_nearest_words(word, language="zh", K=self.topk) synonyms = []