From 65beffa6040cb5de14aaed25d833284a9f430acf Mon Sep 17 00:00:00 2001 From: Elias Kosunen Date: Tue, 15 Oct 2024 23:30:47 +0300 Subject: [PATCH] Add invalid_literal, invalid_fill, and length_too_short --- include/scn/scan.h | 24 ++++++++++--- src/scn/impl.cpp | 30 ++++++++++------ src/scn/impl.h | 36 +++++++++---------- tests/unittests/align_and_fill_test.cpp | 46 +++++++++++++++++++++---- 4 files changed, 97 insertions(+), 39 deletions(-) diff --git a/include/scn/scan.h b/include/scn/scan.h index a565aa71..e0a1536b 100644 --- a/include/scn/scan.h +++ b/include/scn/scan.h @@ -3655,15 +3655,28 @@ class SCN_TRIVIAL_ABI scan_error { public: /// Error code enum code { - /// EOF + /// Input ended unexpectedly. end_of_input, - /// Format string was invalid + /// Format string was invalid. + /// Often a compile-time error, if supported or enabled. invalid_format_string, - /// Scanned value was invalid for given type. + /// Scanned value was invalid for given type, + /// or a value of the given couldn't be scanned. invalid_scanned_value, + /// Literal character specified in format string not found in source. + invalid_literal, + + /// Too many fill characters scanned, + /// field precision (max width) exceeded. + invalid_fill, + + /// Scanned field width was shorter than + /// what was specified as the minimum field width. + length_too_short, + /// Source range is in an invalid state, /// failed to continue reading. invalid_source_state, @@ -3719,6 +3732,9 @@ class SCN_TRIVIAL_ABI scan_error { case end_of_input: case invalid_format_string: case invalid_scanned_value: + case invalid_literal: + case invalid_fill: + case length_too_short: return std::errc::invalid_argument; case invalid_source_state: return std::errc::io_error; @@ -9378,7 +9394,7 @@ SCN_NODISCARD auto input(scan_format_string format) std::tuple{}); auto err = vinput(format, make_scan_args(result->values())); if (SCN_UNLIKELY(!err)) { - result = unexpected(err); + result = unexpected(err.error()); } return result; } diff --git a/src/scn/impl.cpp b/src/scn/impl.cpp index 5f148c9c..8f5d681a 100644 --- a/src/scn/impl.cpp +++ b/src/scn/impl.cpp @@ -1773,7 +1773,8 @@ struct format_handler_base { : (1ull << args_count_lower64) - 1; if (visited_args_lower64 != mask) { - return on_error("Argument list not exhausted"); + return on_error({scan_error::invalid_format_string, + "Argument list not exhausted"}); } } @@ -1785,14 +1786,16 @@ struct format_handler_base { for (auto it = visited_args_upper.begin(); it != visited_args_upper.end() - 1; ++it) { if (*it != std::numeric_limits::max()) { - return on_error("Argument list not exhausted"); + return on_error({scan_error::invalid_format_string, + "Argument list not exhausted"}); } last_args_count -= 8; } const auto mask = static_cast(1u << last_args_count) - 1; if (visited_args_upper.back() != mask) { - return on_error("Argument list not exhausted"); + return on_error({scan_error::invalid_format_string, + "Argument list not exhausted"}); } } @@ -1814,7 +1817,8 @@ struct format_handler_base { SCN_NODISCARD bool has_arg_been_visited(size_t id) { if (SCN_UNLIKELY(id >= args_count)) { - on_error("Invalid out-of-range argument ID"); + on_error({scan_error::invalid_format_string, + "Argument ID out-of-range"}); return false; } @@ -1829,12 +1833,14 @@ struct format_handler_base { void set_arg_as_visited(size_t id) { if (SCN_UNLIKELY(id >= args_count)) { - on_error("Invalid out-of-range argument ID"); + on_error({scan_error::invalid_format_string, + "Argument ID out-of-range"}); return; } if (SCN_UNLIKELY(has_arg_been_visited(id))) { - return on_error("Argument with this ID has already been scanned"); + on_error({scan_error::invalid_format_string, + "Argument with this ID has already been scanned"}); } if (SCN_LIKELY(id < 64)) { @@ -1944,14 +1950,16 @@ struct format_handler : format_handler_base { auto it = get_ctx().begin(); if (impl::is_range_eof(it, get_ctx().end())) { SCN_UNLIKELY_ATTR - return on_error("Unexpected end of source"); + return on_error( + {scan_error::invalid_literal, "Unexpected end of source"}); } if (auto [after_space_it, cp, is_space] = impl::is_first_char_space( detail::make_string_view_from_pointers(begin, end)); cp == detail::invalid_code_point) { SCN_UNLIKELY_ATTR - return on_error("Invalid encoding in format string"); + return on_error({scan_error::invalid_format_string, + "Invalid encoding in format string"}); } else if (is_space) { // Skip all whitespace in input @@ -1969,7 +1977,8 @@ struct format_handler : format_handler_base { if (*it != *begin) { SCN_UNLIKELY_ATTR - return on_error("Unexpected literal character in source"); + return on_error({scan_error::invalid_literal, + "Unexpected literal character in source"}); } get_ctx().advance_to(ranges::next(it)); } @@ -2037,7 +2046,8 @@ struct format_handler : format_handler_base { begin = detail::parse_format_specs(begin, end, handler); if (begin == end || *begin != char_type{'}'}) { SCN_UNLIKELY_ATTR - on_error("Missing '}' in format string"); + on_error({scan_error::invalid_format_string, + "Missing '}' in format string"}); return parse_ctx.begin(); } if (SCN_UNLIKELY(!handler.get_error())) { diff --git a/src/scn/impl.h b/src/scn/impl.h index 475bc15b..839e1634 100644 --- a/src/scn/impl.h +++ b/src/scn/impl.h @@ -4594,19 +4594,19 @@ struct regex_matches_reader { if constexpr (!std::is_same_v) { return unexpected_scan_error( - scan_error::invalid_scanned_value, + scan_error::invalid_format_string, "Cannot transcode is regex_matches_reader"); } else if constexpr (!SCN_REGEX_SUPPORTS_WIDE_STRINGS && !std::is_same_v) { return unexpected_scan_error( - scan_error::invalid_scanned_value, + scan_error::invalid_format_string, "Regex backend doesn't support wide strings as input"); } else { if (!is_entire_source_contiguous(range)) { return unexpected_scan_error( - scan_error::invalid_scanned_value, + scan_error::invalid_format_string, "Cannot use regex with a non-contiguous source " "range"); } @@ -4692,12 +4692,12 @@ auto read_string_view_impl(Range range, if (src.stores_allocated_string()) { return unexpected_scan_error( - scan_error::invalid_scanned_value, + scan_error::invalid_format_string, "Cannot read a string_view from this source range (not " "contiguous)"); } if constexpr (!std::is_same_v) { - return unexpected_scan_error(scan_error::invalid_scanned_value, + return unexpected_scan_error(scan_error::invalid_format_string, "Cannot read a string_view from " "this source range (would require " "transcoding)"); @@ -4819,13 +4819,13 @@ class regex_string_reader_impl { if constexpr (!SCN_REGEX_SUPPORTS_WIDE_STRINGS && !std::is_same_v) { return unexpected_scan_error( - scan_error::invalid_scanned_value, + scan_error::invalid_format_string, "Regex backend doesn't support wide strings as input"); } else { if (!is_entire_source_contiguous(range)) { return unexpected_scan_error( - scan_error::invalid_scanned_value, + scan_error::invalid_format_string, "Cannot use regex with a non-contiguous source " "range"); } @@ -4885,8 +4885,8 @@ class character_reader_impl { -> scan_expected> { return unexpected_scan_error( - scan_error::invalid_scanned_value, - "character_reader requires take_width_view"); + scan_error::invalid_format_string, + "Cannot read characters {:c} without maximum field width"); } }; @@ -5382,8 +5382,9 @@ struct bool_reader : public bool_reader_base { return *r; } - return unexpected_scan_error(scan_error::invalid_scanned_value, - "read_textual: No match"); + return unexpected_scan_error( + scan_error::invalid_scanned_value, + "Failed to read textual boolean: No match"); } }; @@ -5910,18 +5911,15 @@ SCN_MAYBE_UNUSED constexpr scan_expected check_widths_for_arg_reader( if (specs.width != 0) { if (prefix_width + value_width + postfix_width < specs.width) { return unexpected_scan_error( - scan_error::invalid_scanned_value, + scan_error::length_too_short, "Scanned value too narrow, width did not exceed what " "was specified in the format string"); } } if (specs.precision != 0) { - if (prefix_width + value_width + postfix_width > specs.precision) { - return unexpected_scan_error( - scan_error::invalid_scanned_value, - "Scanned value too wide, width exceeded the specified " - "precision"); - } + // Ensured by take_width_view + SCN_ENSURE(prefix_width + value_width + postfix_width <= + specs.precision); } return {}; } @@ -6049,7 +6047,7 @@ struct arg_reader { if (specs.precision != 0) { if (specs.precision <= prefix_width) { return unexpected_scan_error( - scan_error::invalid_scanned_value, + scan_error::invalid_fill, "Too many fill characters before value, " "precision exceeded before reading value"); } diff --git a/tests/unittests/align_and_fill_test.cpp b/tests/unittests/align_and_fill_test.cpp index 1ea2259b..e9e47bfe 100644 --- a/tests/unittests/align_and_fill_test.cpp +++ b/tests/unittests/align_and_fill_test.cpp @@ -330,7 +330,7 @@ TEST(AlignAndFillTest, P1729_Ex3r11) { auto r = scn::scan("42", "{:*>5}"); ASSERT_FALSE(r); - EXPECT_EQ(r.error().code(), scn::scan_error::invalid_scanned_value); + EXPECT_EQ(r.error().code(), scn::scan_error::length_too_short); } TEST(AlignAndFillTest, P1729_Ex3r12) { @@ -343,7 +343,7 @@ TEST(AlignAndFillTest, P1729_Ex3r13) { auto r = scn::scan("42", "{:*>5.5}"); ASSERT_FALSE(r); - EXPECT_EQ(r.error().code(), scn::scan_error::invalid_scanned_value); + EXPECT_EQ(r.error().code(), scn::scan_error::length_too_short); } TEST(AlignAndFillTest, P1729_Ex3r14) @@ -393,7 +393,7 @@ TEST(AlignAndFillTest, P1729_Ex3r20) { auto r = scn::scan("42", "{:*<5}"); ASSERT_FALSE(r); - EXPECT_EQ(r.error().code(), scn::scan_error::invalid_scanned_value); + EXPECT_EQ(r.error().code(), scn::scan_error::length_too_short); } TEST(AlignAndFillTest, P1729_Ex3r21) { @@ -406,7 +406,7 @@ TEST(AlignAndFillTest, P1729_Ex3r22) { auto r = scn::scan("42", "{:*<5.5}"); ASSERT_FALSE(r); - EXPECT_EQ(r.error().code(), scn::scan_error::invalid_scanned_value); + EXPECT_EQ(r.error().code(), scn::scan_error::length_too_short); } TEST(AlignAndFillTest, P1729_Ex3r23) @@ -463,7 +463,7 @@ TEST(AlignAndFillTest, P1729_Ex3r30) { auto r = scn::scan("**42*", "{:*^6}"); ASSERT_FALSE(r); - EXPECT_EQ(r.error().code(), scn::scan_error::invalid_scanned_value); + EXPECT_EQ(r.error().code(), scn::scan_error::length_too_short); } TEST(AlignAndFillTest, P1729_Ex3r31) { @@ -476,9 +476,43 @@ TEST(AlignAndFillTest, P1729_Ex3r32) { auto r = scn::scan("**42*", "{:*^6.6}"); ASSERT_FALSE(r); + EXPECT_EQ(r.error().code(), scn::scan_error::length_too_short); +} + +TEST(AlignAndFillTest, P1729_Ex3r33) +{ + auto r = scn::scan("#*42*", "{:*^}"); + ASSERT_FALSE(r); + EXPECT_EQ(r.error().code(), scn::scan_error::invalid_scanned_value); +} +TEST(AlignAndFillTest, P1729_Ex3r34) +{ + auto r = scn::scan("#*42*", "#{:*^}"); + ASSERT_TRUE(r); + EXPECT_EQ(r->value(), 42); + EXPECT_STREQ(r->begin(), ""); +} +TEST(AlignAndFillTest, P1729_Ex3r35) +{ + auto r = scn::scan("#*42*", "#{:#^}"); + ASSERT_FALSE(r); EXPECT_EQ(r.error().code(), scn::scan_error::invalid_scanned_value); } +TEST(AlignAndFillTest, P1729_Ex3r36) +{ + auto r = scn::scan("***42*", "{:*^3}"); + ASSERT_TRUE(r); + EXPECT_EQ(r->value(), 42); + EXPECT_STREQ(r->begin(), ""); +} +TEST(AlignAndFillTest, P1729_Ex3r37) +{ + auto r = scn::scan("***42*", "{:*^.3}"); + ASSERT_FALSE(r); + EXPECT_EQ(r.error().code(), scn::scan_error::invalid_fill); +} + TEST(AlignAndFillTest, PythonParse1) { auto r = scn::scan("with a herring", "with {:>} herring"); @@ -529,7 +563,7 @@ TEST(AlignAndFillTest, PythonParse4) { auto r = scn::scan("look at that", "{:4}{:4}"); ASSERT_FALSE(r); - EXPECT_EQ(r.error().code(), scn::scan_error::invalid_scanned_value); + EXPECT_EQ(r.error().code(), scn::scan_error::length_too_short); } TEST(AlignAndFillTest, PythonParse5) {