-
Notifications
You must be signed in to change notification settings - Fork 110
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
vmime: prevent loss of a space during text::createFromString (#306)
``` mailbox(text("Test München West", charsets::UTF_8), "[email protected]").generate(); ``` produces ``` =?us-ascii?Q?Test_?= =?utf-8?Q?M=C3=BCnchen?= =?us-ascii?Q?West?= <[email protected]> ``` The first space between ``Test`` and ``München`` is encoded as an underscore along with the first word: ``Test_``. The second space between ``München`` and ``West`` is encoded with neither of the two words and thus lost. Decoding the text results in ``Test MünchenWest`` instead of ``Test München West``. This is caused by how ``vmime::text::createFromString()`` handles transitions between 7-bit and 8-bit words: If an 8-bit word follows a 7-bit word, a space is appended to the previous word. The opposite case of a 7-bit word following an 8-bit word *misses* this behaviour. When one fixes this problem, a follow-up issue appears: ``text::createFromString("a b\xFFc d")`` tokenizes the input into ``m_words={word("a "), word("b\xFFc ", utf8), word("d")}``. This "right-side alignment" nature of the whitespace is a problem for word::generate(): As per RFC 2047, spaces between adjacent encoded words are just separators but not meant to be displayed. A space between an encoded word and a regular ASCII text is not just a separator but also meant to be displayed. When word::generate() outputs the b-word, it would have to strip one space, but only when there is a transition from encoded-word to unencoded word. word::generate() does not know whether d will be encoded or unencoded. The idea now is that we could change the tokenization of ``text::createFromString`` such that whitespace is at the *start* of words rather than at the end. With that, word::generate() need not know anything about the next word, but rather only the *previous* one. Thus, in this patch, 1. The tokenization of ``text::createFromString`` is changed to left-align spaces and the function is fixed to account for the missing space on transition. 2. ``word::generate`` learns how to steal a space character. 3. Testcases are adjusted to account for the shifted position of the space. Fixes: #283, #284 Co-authored-by: Vincent Richard <[email protected]>
- Loading branch information
1 parent
c105165
commit d296c2d
Showing
4 changed files
with
48 additions
and
66 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,7 @@ VMIME_TEST_SUITE_BEGIN(mailboxTest) | |
VMIME_TEST(testSeparatorInComment) | ||
VMIME_TEST(testMalformations) | ||
VMIME_TEST(testExcessiveQuoting) | ||
VMIME_TEST(testSpacing) | ||
VMIME_TEST_LIST_END | ||
|
||
|
||
|
@@ -184,4 +185,13 @@ VMIME_TEST_SUITE_BEGIN(mailboxTest) | |
VASSERT_EQ("generate", "=?utf-8?Q?Foo_B=40r?= <[email protected]>", a->generate()); | ||
} | ||
|
||
void testSpacing() { | ||
|
||
vmime::text t("Foo B\xc3\xa4renstark Baz", vmime::charsets::UTF_8); | ||
vmime::mailbox m(t, "[email protected]"); | ||
VASSERT_EQ("1", "Foo =?utf-8?Q?B=C3=A4renstark?= Baz", t.generate()); | ||
VASSERT_EQ("2", "=?us-ascii?Q?Foo?= =?utf-8?Q?_B=C3=A4renstark?= =?us-ascii?Q?_Baz?= <[email protected]>", m.generate()); | ||
|
||
} | ||
|
||
VMIME_TEST_SUITE_END |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -189,11 +189,11 @@ VMIME_TEST_SUITE_BEGIN(textTest) | |
t2.createFromString(s2, c2); | ||
|
||
VASSERT_EQ("2.1", 3, t2.getWordCount()); | ||
VASSERT_EQ("2.2", "some ASCII characters and special chars: ", t2.getWordAt(0)->getBuffer()); | ||
VASSERT_EQ("2.2", "some ASCII characters and special chars:", t2.getWordAt(0)->getBuffer()); | ||
VASSERT_EQ("2.3", vmime::charset(vmime::charsets::US_ASCII), t2.getWordAt(0)->getCharset()); | ||
VASSERT_EQ("2.4", "\xc3\xa4\xd0\xb0", t2.getWordAt(1)->getBuffer()); | ||
VASSERT_EQ("2.5", c2, t2.getWordAt(1)->getCharset()); | ||
VASSERT_EQ("2.6", "and then more ASCII chars.", t2.getWordAt(2)->getBuffer()); | ||
VASSERT_EQ("2.6", " and then more ASCII chars.", t2.getWordAt(2)->getBuffer()); | ||
VASSERT_EQ("2.7", vmime::charset(vmime::charsets::US_ASCII), t2.getWordAt(2)->getCharset()); | ||
} | ||
|
||
|
@@ -512,9 +512,9 @@ VMIME_TEST_SUITE_BEGIN(textTest) | |
text.createFromString("Achim Br\xc3\xa4ndt", vmime::charsets::UTF_8); | ||
|
||
VASSERT_EQ("1", 2, text.getWordCount()); | ||
VASSERT_EQ("2", "Achim ", text.getWordAt(0)->getBuffer()); | ||
VASSERT_EQ("2", "Achim", text.getWordAt(0)->getBuffer()); | ||
VASSERT_EQ("3", "us-ascii", text.getWordAt(0)->getCharset()); | ||
VASSERT_EQ("4", "Br\xc3\xa4ndt", text.getWordAt(1)->getBuffer()); | ||
VASSERT_EQ("4", " Br\xc3\xa4ndt", text.getWordAt(1)->getBuffer()); | ||
VASSERT_EQ("5", "utf-8", text.getWordAt(1)->getCharset()); | ||
|
||
// Generate | ||
|
@@ -534,7 +534,7 @@ VMIME_TEST_SUITE_BEGIN(textTest) | |
|
||
// Space MUST be encoded inside a word | ||
vmime::mailbox mbox(vmime::text("Achim Br\xc3\xa4ndt", vmime::charsets::UTF_8), "[email protected]"); | ||
VASSERT_EQ("generate1", "=?us-ascii?Q?Achim_?= =?utf-8?Q?Br=C3=A4ndt?= <[email protected]>", mbox.generate()); | ||
VASSERT_EQ("generate1", "=?us-ascii?Q?Achim?= =?utf-8?Q?_Br=C3=A4ndt?= <[email protected]>", mbox.generate()); | ||
|
||
vmime::text txt; | ||
txt.appendWord(vmime::make_shared <vmime::word>("Achim ", "us-ascii")); | ||
|