Skip to content

Commit

Permalink
Datetime scanning
Browse files Browse the repository at this point in the history
  • Loading branch information
eliaskosunen committed Sep 10, 2024
1 parent f3b00ec commit 8cabfa8
Show file tree
Hide file tree
Showing 16 changed files with 3,369 additions and 107 deletions.
2,785 changes: 2,785 additions & 0 deletions include/scn/chrono.h

Large diffs are not rendered by default.

30 changes: 20 additions & 10 deletions include/scn/scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -3218,19 +3218,15 @@ class view_interface {
typename = std::enable_if_t<contiguous_iterator<iterator_t<R>>>>
constexpr auto data()
{
return ranges::empty(derived())
? nullptr
: std::addressof(*ranges::begin(derived()));
return detail::to_address(ranges::begin(derived()));
}

template <typename R = D,
typename = std::enable_if_t<
range<const R> && contiguous_iterator<iterator_t<const R>>>>
constexpr auto data() const
{
return ranges::empty(derived())
? nullptr
: std::addressof(*ranges::begin(derived()));
return detail::to_address(ranges::begin(derived()));
}

template <typename R = D,
Expand Down Expand Up @@ -4042,6 +4038,18 @@ inline constexpr char32_t decode_code_point_exhaustive_valid(
}
}

inline constexpr bool is_cp_space(char32_t cp) noexcept
{
// Pattern_White_Space property
return (cp >= 0x09 && cp <= 0x0d) ||
cp == 0x20 || // ASCII space characters
cp == 0x85 || // NEXT LINE (NEL)
cp == 0x200e || // LEFT-TO-RIGHT MARK
cp == 0x200f || // RIGHT-TO-LEFT MARK
cp == 0x2028 || // LINE SEPARATOR
cp == 0x2029; // PARAGRAPH SEPARATOR
}

} // namespace detail

/////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -4192,6 +4200,7 @@ class basic_scan_buffer<CharT>::forward_iterator {

bool stores_parent() const
{
assert(m_begin);
return m_end == nullptr;
}

Expand Down Expand Up @@ -4937,8 +4946,9 @@ class arg_value {
auto& pctx_ref = *static_cast<parse_context_type*>(pctx);
auto& ctx_ref = *static_cast<context_type*>(ctx);

SCN_TRY_ERR(_, s.parse(pctx_ref));
SCN_UNUSED(_);
SCN_TRY_ERR(fmt_it, s.parse(pctx_ref));
pctx_ref.advance_to(fmt_it);

SCN_TRY_ERR(it, s.scan(arg_ref, ctx_ref));
ctx_ref.advance_to(SCN_MOVE(it));

Expand Down Expand Up @@ -7867,7 +7877,7 @@ class format_string_checker {
};

template <typename Source, typename... Args, typename Str>
auto check_format_string(const Str&)
constexpr auto check_format_string(const Str&)
-> std::enable_if_t<!is_compile_string_v<Str>>
{
// TODO: SCN_ENFORE_COMPILE_STRING?
Expand All @@ -7879,7 +7889,7 @@ auto check_format_string(const Str&)
}

template <typename Source, typename... Args, typename Str>
auto check_format_string(Str format_str)
constexpr auto check_format_string(Str format_str)
-> std::enable_if_t<is_compile_string_v<Str>>
{
using char_type = typename Str::char_type;
Expand Down
4 changes: 2 additions & 2 deletions src/scn/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,15 +193,15 @@ std::string_view::iterator find_classic_space_narrow_fast(
{
return find_classic_impl(
source, [](char ch) { return is_ascii_space(ch); },
[](char32_t cp) { return is_cp_space(cp); });
[](char32_t cp) { return detail::is_cp_space(cp); });
}

std::string_view::iterator find_classic_nonspace_narrow_fast(
std::string_view source)
{
return find_classic_impl(
source, [](char ch) { return !is_ascii_space(ch); },
[](char32_t cp) { return !is_cp_space(cp); });
[](char32_t cp) { return !detail::is_cp_space(cp); });
}

std::string_view::iterator find_nondecimal_digit_narrow_fast(
Expand Down
21 changes: 5 additions & 16 deletions src/scn/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1195,18 +1195,6 @@ constexpr auto get_next_code_point_valid(std::basic_string_view<CharT> input)
detail::decode_code_point_exhaustive_valid(input.substr(0, len))};
}

constexpr bool is_cp_space(char32_t cp) noexcept
{
// Pattern_White_Space property
return (cp >= 0x09 && cp <= 0x0d) ||
cp == 0x20 || // ASCII space characters
cp == 0x85 || // NEXT LINE (NEL)
cp == 0x200e || // LEFT-TO-RIGHT MARK
cp == 0x200f || // RIGHT-TO-LEFT MARK
cp == 0x2028 || // LINE SEPARATOR
cp == 0x2029; // PARAGRAPH SEPARATOR
}

template <typename CharT>
struct is_first_char_space_result {
ranges::iterator_t<std::basic_string_view<CharT>> iterator;
Expand All @@ -1221,7 +1209,7 @@ inline constexpr auto is_first_char_space(std::basic_string_view<CharT> str)
// TODO: optimize
SCN_EXPECT(!str.empty());
auto res = get_next_code_point(str);
return {res.iterator, res.value, is_cp_space(res.value)};
return {res.iterator, res.value, detail::is_cp_space(res.value)};
}

inline constexpr scan_expected<wchar_t> encode_code_point_as_wide_character(
Expand Down Expand Up @@ -2107,7 +2095,7 @@ auto read_until_classic_space(Range range) -> ranges::const_iterator_t<Range>

return read_until_code_point(
ranges::subrange{it, range.end()},
[](char32_t cp) noexcept { return is_cp_space(cp); });
[](char32_t cp) noexcept { return detail::is_cp_space(cp); });
}
}

Expand All @@ -2134,8 +2122,9 @@ auto read_while_classic_space(Range range) -> ranges::const_iterator_t<Range>
ranges::advance(it, seg.size());
}

return read_while_code_point(
range, [](char32_t cp) noexcept { return is_cp_space(cp); });
return read_while_code_point(range, [](char32_t cp) noexcept {
return detail::is_cp_space(cp);
});
}
}

Expand Down
1 change: 1 addition & 0 deletions tests/fuzz/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ add_fuzzer(int)
add_fuzzer(float)
add_fuzzer(string)
add_fuzzer(format)
add_fuzzer(chrono)
add_fuzzer(string_impl)

add_custom_target(scn_fuzz_prepare ALL
Expand Down
61 changes: 61 additions & 0 deletions tests/fuzz/chrono_fuzz.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright 2017 Elias Kosunen
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file is a part of scnlib:
// https://github.com/eliaskosunen/scnlib

#include "fuzz.h"

#include <scn/chrono.h>

namespace scn::fuzz {
template <typename CharT, typename Source>
void do_basic_run_for_source(Source& source,
const format_strings_type<CharT>& format_strings)
{
do_basic_run_for_type<CharT, std::tm>(source, format_strings);
do_basic_run_for_type<CharT, tm_with_tz>(source, format_strings);
do_basic_run_for_type<CharT, datetime_components>(source, format_strings);
}

namespace {
void run(const uint8_t* data, size_t size)
{
if (size > max_input_bytes || size == 0) {
return;
}

auto [sv, wsv_reinterpret, wsv_transcode] = make_input_views(data, size);

const auto& f =
get_format_strings<char>("{:%T}", "{:%R}", "{:%D}", "{:%F}",
"{:%Y-%m-%dT%H:%M:%S%z}", "{:%a}", "{:%b}");
do_basic_run(sv, f);

const auto& wf = get_format_strings<wchar_t>(
L"{:%T}", L"{:%R}", L"{:%D}", L"{:%F}", L"{:%Y-%m-%dT%H:%M:%S%z}",
L"{:%a}", L"{:%b}");
do_basic_run(wsv_reinterpret, wf);
if (!wsv_transcode.empty()) {
do_basic_run(wsv_transcode, wf);
}
}
} // namespace
} // namespace scn::fuzz

extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
{
scn::fuzz::run(data, size);
return 0;
}
46 changes: 46 additions & 0 deletions tests/fuzz/dictionaries/chrono.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"1970"
"2024"
"1"
"12"
"29"
"31"
"12"
"24"
"00"
"0"
"99"
":"
"."
"-"
"+"
"T"
"Z"
"Europe/Helsinki"
"America/New_York"
"GMT"
"UTC"
"EST"
"Monday"
"Tuesday"
"Wednesday"
"Thursday"
"Friday"
"Saturday"
"Sunday"
"January"
"February"
"March"
"April"
"May"
"June"
"July"
"August"
"September"
"October"
"November"
"December"
"am"
"a.m."
"PM"
"P.M."
"%"
1 change: 1 addition & 0 deletions tests/fuzz/seed-corpora/chrono/1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2024-08-29T23:41:10+02:00
1 change: 1 addition & 0 deletions tests/fuzz/seed-corpora/chrono/2
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Thursday, Aug 29 2024, 23:41:00, Europe/Helsinki
1 change: 1 addition & 0 deletions tests/fuzz/seed-corpora/chrono/3
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2024-08-29
1 change: 1 addition & 0 deletions tests/fuzz/seed-corpora/chrono/4
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
23:41
1 change: 1 addition & 0 deletions tests/fuzz/seed-corpora/chrono/5
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
11:41 PM
1 change: 1 addition & 0 deletions tests/fuzz/seed-corpora/chrono/6
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
08/29/24
1 change: 1 addition & 0 deletions tests/unittests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ add_executable(scn_tests
args_test.cpp
buffer_test.cpp
char_test.cpp
chrono_test.cpp
context_test.cpp
custom_type_test.cpp
error_test.cpp
Expand Down
Loading

0 comments on commit 8cabfa8

Please sign in to comment.