Skip to content

Commit

Permalink
Merge pull request #43 from pgolo/dev
Browse files Browse the repository at this point in the history
Version 1.3.3
  • Loading branch information
pgolo authored Sep 17, 2021
2 parents 591a57f + cae7fdd commit de5f660
Show file tree
Hide file tree
Showing 14 changed files with 49 additions and 2 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## sic 1.3

### [1.3.3] - 2021-09-17

#### Changed

- Fixed bug causing incorrect normalization when replacement tokens can be tokenized themselves

### [1.3.2] - 2021-01-21

#### Changed
Expand Down
Binary file removed dist/sic-1.3.2-cp36-cp36m-win_amd64.whl
Binary file not shown.
Binary file removed dist/sic-1.3.2-cp37-cp37m-win_amd64.whl
Binary file not shown.
Binary file removed dist/sic-1.3.2-cp38-cp38-win_amd64.whl
Binary file not shown.
Binary file removed dist/sic-1.3.2-cp39-cp39-win_amd64.whl
Binary file not shown.
Binary file removed dist/sic-1.3.2.tar.gz
Binary file not shown.
Binary file added dist/sic-1.3.3-cp36-cp36m-win_amd64.whl
Binary file not shown.
Binary file added dist/sic-1.3.3-cp37-cp37m-win_amd64.whl
Binary file not shown.
Binary file added dist/sic-1.3.3-cp38-cp38-win_amd64.whl
Binary file not shown.
Binary file added dist/sic-1.3.3-cp39-cp39-win_amd64.whl
Binary file not shown.
Binary file added dist/sic-1.3.3.tar.gz
Binary file not shown.
2 changes: 1 addition & 1 deletion shipping/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

setup(
name='sic',
version='1.3.2',
version='1.3.3',
description='Utility for string normalization',
long_description=long_description,
long_description_content_type='text/markdown',
Expand Down
2 changes: 1 addition & 1 deletion sic/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ def normalize(self, source_string, word_separator=' ', normalizer_option=0, cont
on_the_right = False
added_separator = False
if character not in (word_separator, control_character) and last_character not in (word_separator, control_character):
if this_group == 0 or this_group != last_group:
if (this_group == 0 or this_group != last_group) and (subtrie is self.content or character not in subtrie):
if not buffer.endswith(word_separator) and not buffer.endswith(control_character):
buffer += control_character
if len(b_map) == len(buffer):
Expand Down
41 changes: 41 additions & 0 deletions test/ut_sic.py
Original file line number Diff line number Diff line change
Expand Up @@ -946,6 +946,47 @@ def test_spelling_correction_implicit(self):
assert expected1 == normalized1, 'Expected "%s", got "%s".' % (expected1, normalized1)
assert expected2 == normalized2, 'Expected "%s", got "%s".' % (expected2, normalized2)

def test_split_inside_replace(self):
model = sic.Model()
model.add_rule(sic.ReplaceToken('(p)', '[p]'))
model.add_rule(sic.SplitToken('(a)', 'l'))
model.add_rule(sic.SplitToken('p', 'lmr'))
sic.build_normalizer(model)
test_string1 = 'ab(c)'; expected1 = 'ab ( c )'; normalized1 = sic.normalize(test_string1)
test_string2 = 'ab(p)'; expected2 = 'ab [p]'; normalized2 = sic.normalize(test_string2)
test_string3 = '(a)ab ( p )'; expected3 = '(a) ab ( p )'; normalized3 = sic.normalize(test_string3)
test_string4 = 'ab(p)cd'; expected4 = 'ab [p] cd'; normalized4 = sic.normalize(test_string4)
test_string5 = 'ab(p)(p)'; expected5 = 'ab [p] [p]'; normalized5 = sic.normalize(test_string5)
test_string6 = '(p)ab(p)(p)'; expected6 = '[p] ab [p] [p]'; normalized6 = sic.normalize(test_string6)
test_string7 = '(p)(p)ab(p)'; expected7 = '[p] [p] ab [p]'; normalized7 = sic.normalize(test_string7)
test_string8 = 'ab(p)(p)cd'; expected8 = 'ab [p] [p] cd'; normalized8 = sic.normalize(test_string8)
test_string9 = 'AB(C)'; expected9 = 'ab ( c )'; normalized9 = sic.normalize(test_string9)
test_string10 = 'AB(P)'; expected10 = 'ab [p]'; normalized10 = sic.normalize(test_string10)
test_string11 = '(A)AB ( P )'; expected11 = '(a) ab ( p )'; normalized11 = sic.normalize(test_string11)
test_string12 = 'AB(P)CD'; expected12 = 'ab [p] cd'; normalized12 = sic.normalize(test_string12)
test_string13 = 'AB(P)(P)'; expected13 = 'ab [p] [p]'; normalized13 = sic.normalize(test_string13)
test_string14 = '(P)AB(P)(P)'; expected14 = '[p] ab [p] [p]'; normalized14 = sic.normalize(test_string14)
test_string15 = '(P)(P)AB(P)'; expected15 = '[p] [p] ab [p]'; normalized15 = sic.normalize(test_string15)
test_string16 = 'AB(P)(P)CD'; expected16 = 'ab [p] [p] cd'; normalized16 = sic.normalize(test_string16)
test_string17 = 'AB(P)(P)CDPP'; expected17 = 'ab [p] [p] cd p p'; normalized17 = sic.normalize(test_string17)
assert expected1 == normalized1, 'Expected "%s", got "%s".' % (expected1, normalized1)
assert expected2 == normalized2, 'Expected "%s", got "%s".' % (expected2, normalized2)
assert expected3 == normalized3, 'Expected "%s", got "%s".' % (expected3, normalized3)
assert expected4 == normalized4, 'Expected "%s", got "%s".' % (expected4, normalized4)
assert expected5 == normalized5, 'Expected "%s", got "%s".' % (expected5, normalized5)
assert expected6 == normalized6, 'Expected "%s", got "%s".' % (expected6, normalized6)
assert expected7 == normalized7, 'Expected "%s", got "%s".' % (expected7, normalized7)
assert expected8 == normalized8, 'Expected "%s", got "%s".' % (expected8, normalized8)
assert expected9 == normalized9, 'Expected "%s", got "%s".' % (expected9, normalized9)
assert expected10 == normalized10, 'Expected "%s", got "%s".' % (expected10, normalized10)
assert expected11 == normalized11, 'Expected "%s", got "%s".' % (expected11, normalized11)
assert expected12 == normalized12, 'Expected "%s", got "%s".' % (expected12, normalized12)
assert expected13 == normalized13, 'Expected "%s", got "%s".' % (expected13, normalized13)
assert expected14 == normalized14, 'Expected "%s", got "%s".' % (expected14, normalized14)
assert expected15 == normalized15, 'Expected "%s", got "%s".' % (expected15, normalized15)
assert expected16 == normalized16, 'Expected "%s", got "%s".' % (expected16, normalized16)
assert expected17 == normalized17, 'Expected "%s", got "%s".' % (expected17, normalized17)

if __name__ == '__main__':
sys.path.insert(0, '')
import sic # pylint: disable=E0611,F0401
Expand Down

0 comments on commit de5f660

Please sign in to comment.