Skip to content

Commit

Permalink
Fix errors found with fuzzing
Browse files Browse the repository at this point in the history
  • Loading branch information
eliaskosunen committed Sep 29, 2023
1 parent 292b93d commit 77da9d6
Show file tree
Hide file tree
Showing 8 changed files with 86 additions and 49 deletions.
21 changes: 20 additions & 1 deletion include/scn/detail/format_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,26 @@ namespace scn {
{
}

constexpr void on_literal_text(const CharT*, const CharT*) const {}
constexpr void on_literal_text(const CharT* begin,
const CharT* end) const
{
while (begin != end) {
const auto len =
utf_code_point_length_by_starting_code_unit(*begin);
if (SCN_UNLIKELY(len == 0 ||
static_cast<size_t>(end - begin) < len)) {
return on_error("Invalid encoding in format string");
}

const auto cp = decode_utf_code_point_exhaustive(
std::basic_string_view<CharT>{begin, len});
if (SCN_UNLIKELY(cp >= invalid_code_point)) {
return on_error("Invalid encoding in format string");
}

begin += len;
}
}

constexpr auto on_arg_id()
{
Expand Down
43 changes: 11 additions & 32 deletions include/scn/detail/format_string_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -639,40 +639,19 @@ namespace scn {
auto len = utf_code_point_length_by_starting_code_unit(*begin);
if (SCN_UNLIKELY(len == 0 ||
static_cast<size_t>(end - begin) < len)) {
handler.on_error(
"Invalid Unicode code point in format string argument");
handler.on_error("Invalid encoding in format string");
return invalid_code_point;
}

auto cp_begin = begin;
begin += len;

if constexpr (sizeof(CharT) == 1) {
// UTF-8
auto cp = decode_utf8_code_point_exhaustive(
std::string_view{&*cp_begin, len});
if (SCN_UNLIKELY(cp == invalid_code_point)) {
handler.on_error(
"Invalid Unicode code point in format string argument");
return invalid_code_point;
}
return cp;
}
else if constexpr (sizeof(CharT) == 2) {
// UTF-16
auto cp = decode_utf16_code_point_exhaustive(
std::wstring_view{&*cp_begin, len});
if (SCN_UNLIKELY(cp == invalid_code_point)) {
handler.on_error(
"Invalid Unicode code point in format string argument");
return invalid_code_point;
}
return cp;
}
else {
SCN_EXPECT(len == 1);
return static_cast<char32_t>(*cp_begin);
const auto cp = decode_utf_code_point_exhaustive(
std::basic_string_view<CharT>{begin, len});
if (SCN_UNLIKELY(cp >= invalid_code_point)) {
handler.on_error("Invalid encoding in format string");
return invalid_code_point;
}

begin += len;
return cp;
}

template <typename CharT, typename SpecHandler>
Expand All @@ -685,7 +664,7 @@ namespace scn {

auto cp_first =
parse_presentation_set_code_point(begin, end, handler);
if (SCN_UNLIKELY(cp_first == invalid_code_point)) {
if (SCN_UNLIKELY(cp_first >= invalid_code_point)) {
return;
}

Expand All @@ -695,7 +674,7 @@ namespace scn {

auto cp_second =
parse_presentation_set_code_point(begin, end, handler);
if (SCN_UNLIKELY(cp_second == invalid_code_point)) {
if (SCN_UNLIKELY(cp_second >= invalid_code_point)) {
return;
}

Expand Down
14 changes: 14 additions & 0 deletions include/scn/detail/unicode.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,20 @@ namespace scn {
SCN_UNREACHABLE;
}
}

template <typename CharT>
inline constexpr char32_t decode_utf_code_point_exhaustive(
std::basic_string_view<CharT> input)
{
if constexpr (sizeof(CharT) == 1) {
return decode_utf8_code_point_exhaustive(input);
} else if constexpr (sizeof(CharT) == 2) {
return decode_utf16_code_point_exhaustive(input);
} else {
SCN_EXPECT(input.size() == 1);
return static_cast<char32_t>(input.front());
}
}
} // namespace detail

SCN_END_NAMESPACE
Expand Down
20 changes: 12 additions & 8 deletions src/scn/impl/algorithms/find_whitespace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,33 @@ namespace scn {
std::string_view::iterator find_classic_space_simple_impl(
std::string_view source)
{
for (auto it = source.begin(); it != source.end(); ++it) {
auto it = source.begin();
while (it != source.end()) {
auto ret = is_first_char_space(
detail::make_string_view_from_iterators<char>(
it, source.end()));
if (ret.value) {
return it;
if (ret.is_space) {
break;
}
it = ret.iterator;
}
return source.end();
return it;
}

std::string_view::iterator find_classic_nonspace_simple_impl(
std::string_view source)
{
for (auto it = source.begin(); it != source.end(); ++it) {
auto it = source.begin();
while (it != source.end()) {
auto ret = is_first_char_space(
detail::make_string_view_from_iterators<char>(
it, source.end()));
if (!ret.value) {
return it;
if (!ret.is_space) {
break;
}
it = ret.iterator;
}
return source.end();
return it;
}

bool is_decimal_digit(char ch) SCN_NOEXCEPT
Expand Down
3 changes: 2 additions & 1 deletion src/scn/impl/unicode/unicode.h
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ namespace scn {
detail::make_string_view_from_iterators<SourceCharT>(
it, source.end()));

if (SCN_UNLIKELY(cp == detail::invalid_code_point)) {
if (SCN_UNLIKELY(cp >= detail::invalid_code_point)) {
cp = 0xfffd; // Replacement character
}

Expand All @@ -465,6 +465,7 @@ namespace scn {
SCN_EXPECT(ret == 1);

dest.append(temp.data());
it = iter;
}
}

Expand Down
13 changes: 9 additions & 4 deletions src/scn/impl/unicode/unicode_whitespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,21 @@ namespace scn {
cp == 0x2029; // PARAGRAPH SEPARATOR
}

template <typename CharT>
struct is_first_char_space_result {
ranges::iterator_t<std::basic_string_view<CharT>> iterator;
char32_t cp;
bool is_space;
};

template <typename CharT>
inline auto is_first_char_space(std::basic_string_view<CharT> str)
-> iterator_value_result<
ranges::iterator_t<std::basic_string_view<CharT>>,
bool>
-> is_first_char_space_result<CharT>
{
// TODO: optimize
SCN_EXPECT(!str.empty());
auto res = get_next_code_point(str);
return {res.iterator, is_cp_space(res.value)};
return {res.iterator, res.value, is_cp_space(res.value)};
}
} // namespace impl

Expand Down
11 changes: 8 additions & 3 deletions src/scn/vscan_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,15 @@ namespace scn {
return on_error("Unexpected end of source");
}

if (auto [after_space_it, is_space] =
if (auto [after_space_it, cp, is_space] =
impl::is_first_char_space(
std::basic_string_view<CharT>{begin, end});
is_space) {
std::basic_string_view<CharT>{
begin, static_cast<size_t>(
ranges::distance(begin, end))});
cp == detail::invalid_code_point) {
return on_error("Invalid encoding in format string");
}
else if (is_space) {
ctx.advance_to(
impl::read_while_classic_space(ctx.range()));
return;
Expand Down
10 changes: 10 additions & 0 deletions tests/unittests/string_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,13 @@ TEST(StringTest, WonkyInput)
EXPECT_TRUE(result->range().empty());
EXPECT_EQ(result->value(), input);
}

TEST(StringTest, WonkyInputAndFormatWithTranscoding)
{
const char source[] = {'a', ']', 'c', '{', '}', '\xdf', ':', '\xb1'};
auto input = std::string_view{source, sizeof(source)};

auto result = scn::scan<std::wstring>(input, input);
ASSERT_FALSE(result);
EXPECT_EQ(result.error().code(), scn::scan_error::invalid_format_string);
}

0 comments on commit 77da9d6

Please sign in to comment.