Skip to content

Commit

Permalink
Add some more format string tests
Browse files Browse the repository at this point in the history
  • Loading branch information
eliaskosunen committed Oct 7, 2023
1 parent 4055d0c commit ae9cd02
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 26 deletions.
97 changes: 71 additions & 26 deletions include/scn/detail/format_string_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -904,38 +904,82 @@ namespace scn {
std::basic_string_view<CharT> format,
Handler&& handler)
{
if (SCN_LIKELY(format.size() < 32)) {
// Small size -> use a simple loop instead of memchr
auto begin = format.data();
auto it = begin;
const auto end = format.data() + format.size();

while (it != end) {
const auto ch = *it++;
if (ch == CharT{'{'}) {
handler.on_literal_text(begin, it - 1);

begin = it =
parse_replacement_field(it - 1, end, handler);
if (!handler) {
return;
}
// TODO: memchr fast path with a larger (> 32) format string

auto begin = format.data();
auto it = begin;
const auto end = format.data() + format.size();

while (it != end) {
const auto ch = *it++;
if (ch == CharT{'{'}) {
handler.on_literal_text(begin, it - 1);

begin = it = parse_replacement_field(it - 1, end, handler);
if (!handler) {
return;
}
else if (ch == CharT{'}'}) {
if (SCN_UNLIKELY(it == end || *it != CharT{'}'})) {
handler.on_error("Unmatched '}' in format string");
return;
}

handler.on_literal_text(begin, it);
begin = ++it;
}
else if (ch == CharT{'}'}) {
if (SCN_UNLIKELY(it == end || *it != CharT{'}'})) {
handler.on_error("Unmatched '}' in format string");
return;
}

handler.on_literal_text(begin, it);
begin = ++it;
}
}

handler.on_literal_text(begin, end);
return;
handler.on_literal_text(begin, end);
return;

#if 0
auto begin = format.data();
const auto end = format.data() + format.size();

while (begin != end) {
if (end - begin >= 1 && *begin == CharT{'}'} &&
*(begin + 1) == CharT{'}'}) {
handler.on_literal_text(begin, begin + 1);
begin += 2;
}
if (!handler || begin == end) {
break;
}

auto p = find<IsConstexpr>(begin, end, CharT{'{'});
if (p == end) {
return handler.on_literal_text(begin, end);
}

handler.on_literal_text(begin, p);
if (!handler) {
break;
}

++p;
if (p == end) {
break;
}
if (*p == CharT{'{'}) {
handler.on_literal_text(p, p + 1);
++p;
continue;
}
if (!handler) {
break;
}

p = parse_replacement_field(p - 1, end, handler);
if (!handler) {
break;
}
begin = p;
}
#endif

#if 0
const auto reader = [&handler](const CharT* begin,
const CharT* end) {
if (begin == end) {
Expand Down Expand Up @@ -987,6 +1031,7 @@ namespace scn {
return;
}
}
#endif
}

template <bool IsConstexpr, typename CharT, typename Handler>
Expand Down
82 changes: 82 additions & 0 deletions tests/unittests/format_string_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ TEST(FormatStringTest, TooManyArgsInArgListLiteral)
}
#endif

TEST(FormatStringTest, EscapedBraces){
auto result = scn::scan<int>("{}123", scn::runtime("{{}}{}"));
ASSERT_TRUE(result);
EXPECT_EQ(result->value(), 123);
}

TEST(FormatStringTest, TooManyArgsInFormatStringRuntime)
{
auto result = scn::scan<int>("42", scn::runtime("{} {}"));
Expand Down Expand Up @@ -135,6 +141,13 @@ TEST(FormatStringTest, AlphaCharacterSet)
ASSERT_TRUE(result);
EXPECT_EQ(std::get<0>(result->values()), "abc");
}
TEST(FormatStringTest, AlphaCharacterSetRuntime)
{
auto result =
scn::scan<std::string>("abc123", scn::runtime("{:[:alpha:]}"));
ASSERT_TRUE(result);
EXPECT_EQ(std::get<0>(result->values()), "abc");
}
TEST(FormatStringTest, AlphaCharacterSetWithStringView)
{
auto result = scn::scan<std::string_view>("abc123", "{:[:alpha:]}");
Expand All @@ -159,6 +172,42 @@ TEST(FormatStringTest, NonTerminatedCharacterSetWithStringView)
EXPECT_FALSE(result);
}

TEST(FormatStringTest, BackslashLettersCharacterSet)
{
auto result = scn::scan<std::string>("abc def", "{:[\\l]}");
ASSERT_TRUE(result);
EXPECT_EQ(result->value(), "abc");
}
TEST(FormatStringTest, BackslashLettersCharacterSetRuntime)
{
auto result = scn::scan<std::string>("abc def", scn::runtime("{:[\\l]}"));
ASSERT_TRUE(result);
EXPECT_EQ(result->value(), "abc");
}
TEST(FormatStringTest, NonTerminatedBackslashSpecifier)
{
auto result = scn::scan<std::string>("abc def", scn::runtime("{:[\\"));
ASSERT_FALSE(result);
}

TEST(FormatStringTest, RangeSet)
{
auto result = scn::scan<std::string>("abcd", "{:[a-c]}");
ASSERT_TRUE(result);
EXPECT_EQ(result->value(), "abc");
}
TEST(FormatStringTest, RangeSetRuntime)
{
auto result = scn::scan<std::string>("abcd", scn::runtime("{:[a-c]}"));
ASSERT_TRUE(result);
EXPECT_EQ(result->value(), "abc");
}
TEST(FormatStringTest, InvalidRangeSet)
{
auto result = scn::scan<std::string>("abcd", scn::runtime("{:[c-a]}"));
ASSERT_FALSE(result);
}

TEST(FormatStringTest, ExtraArgInFormatString)
{
auto result = scn::scan<std::string>("abc def", scn::runtime("{} {}"));
Expand Down Expand Up @@ -192,3 +241,36 @@ TEST(FormatStringTest, AnyComboOfWhitespaceSkipsAnyWhitespace)
EXPECT_EQ(a, 'a');
EXPECT_EQ(b, 'b');
}

TEST(FormatStringTest, LongFormatString1)
{
auto result = scn::scan<std::string>(
"abcdefghijklmnopqrstuvwxyz 1 234567890",
scn::runtime("abcdefghijklmnopqrstuvwxyz {} 23456789"));
ASSERT_TRUE(result);
EXPECT_EQ(result->value(), "1");
}
TEST(FormatStringTest, LongFormatString2)
{
auto result = scn::scan<std::string>(
"123456789 0 abcdefghijklmnopqrstuvwxyz",
scn::runtime("123456789 {} abcdefghijklmnopqrstuvwxyz"));
ASSERT_TRUE(result);
EXPECT_EQ(result->value(), "0");
}
TEST(FormatStringTest, LongFormatString3)
{
auto result = scn::scan<char>(
"abcdefghijklmnopqrstuvwxyz {}1{} 234567890",
scn::runtime("abcdefghijklmnopqrstuvwxyz {{}}{}{{}} 23456789"));
ASSERT_TRUE(result);
EXPECT_EQ(result->value(), '1');
}
TEST(FormatStringTest, LongFormatString4)
{
auto result = scn::scan<char>(
"123456789 {}0{} abcdefghijklmnopqrstuvwxyz",
scn::runtime("123456789 {{}}{}{{}} abcdefghijklmnopqrstuvwxyz"));
ASSERT_TRUE(result);
EXPECT_EQ(result->value(), '0');
}
9 changes: 9 additions & 0 deletions tests/unittests/string_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,12 @@ TEST(StringTest, WonkyInputAndFormatWithTranscoding)
ASSERT_FALSE(result);
EXPECT_EQ(result.error().code(), scn::scan_error::invalid_format_string);
}

TEST(StringTest, RecoveryFromInvalidEncoding)
{
const auto source = std::string_view{"a\xc3 "};
auto result = scn::scan<std::string>(source, "{}");
ASSERT_TRUE(result);
EXPECT_EQ(result->value(), "a\xc3");
EXPECT_EQ(result->begin(), source.end() - 1);
}
23 changes: 23 additions & 0 deletions tests/unittests/unicode_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,29 @@ TEST(UnicodeTest, Utf8CpLength)
0);
}

TEST(UnicodeTest, Utf8Decode)
{
EXPECT_EQ(scn::detail::decode_utf8_code_point_exhaustive("a"),
static_cast<char32_t>('a'));
EXPECT_EQ(scn::detail::decode_utf8_code_point_exhaustive_valid("a"),
static_cast<char32_t>('a'));

EXPECT_EQ(scn::detail::decode_utf8_code_point_exhaustive("ä"),
char32_t{0xe4});
EXPECT_EQ(scn::detail::decode_utf8_code_point_exhaustive_valid("ä"),
char32_t{0xe4});

EXPECT_EQ(scn::detail::decode_utf8_code_point_exhaustive(""),
char32_t{0xfffd});
EXPECT_EQ(scn::detail::decode_utf8_code_point_exhaustive_valid(""),
char32_t{0xfffd});

EXPECT_EQ(scn::detail::decode_utf8_code_point_exhaustive("😀"),
char32_t{0x1f600});
EXPECT_EQ(scn::detail::decode_utf8_code_point_exhaustive_valid("😀"),
char32_t{0x1f600});
}

TEST(UnicodeTest, Utf16CpLength)
{
EXPECT_EQ(scn::detail::utf16_code_point_length_by_starting_code_unit(
Expand Down

0 comments on commit ae9cd02

Please sign in to comment.