Skip to content

Commit

Permalink
Add support for regex slash escaping
Browse files Browse the repository at this point in the history
  • Loading branch information
eliaskosunen committed Jan 14, 2024
1 parent eadf64e commit 8653b54
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 17 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Changes are in comparison to 2.0.0-beta.
* Specific types (`SCN_DISABLE_TYPE_*`)
* Locales (`SCN_DISABLE_LOCALE`)
* IOStreams (`SCN_DISABLE_IOSTREAM`)
* Regex support (`SCN_DISABLE_REGEX`)
* Floating-point scanning fallbacks (`SCN_DISABLE_FROM_CHARS`, `SCN_DISABLE_STRTOD`)
* These can be useful in some constrained environments, where these facilities are either not available or not used
* Thanks [@cjvaughter (CJ Vaughter)](https://github.com/cjvaughter) for the original implementation in v1 in #70 and #71
Expand Down
63 changes: 48 additions & 15 deletions src/scn/impl/reader/regex_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,16 +123,12 @@ namespace scn {
}
#endif

template <typename CharT, typename Pattern, typename Input>
auto read_regex_string_impl(Pattern pattern,
template <typename CharT, typename Input>
auto read_regex_string_impl(std::basic_string_view<CharT> pattern,
detail::regex_flags flags,
Input input)
-> scan_expected<ranges::iterator_t<Input>>
{
static_assert(
ranges::contiguous_range<Pattern> &&
ranges::borrowed_range<Pattern> &&
std::is_same_v<ranges::range_value_t<Pattern>, CharT>);
static_assert(ranges::contiguous_range<Input> &&
ranges::borrowed_range<Input> &&
std::is_same_v<ranges::range_value_t<Input>, CharT>);
Expand Down Expand Up @@ -248,17 +244,13 @@ namespace scn {
#endif
}

template <typename CharT, typename Pattern, typename Input>
auto read_regex_matches_impl(Pattern pattern,
template <typename CharT, typename Input>
auto read_regex_matches_impl(std::basic_string_view<CharT> pattern,
detail::regex_flags flags,
Input input,
basic_regex_matches<CharT>& value)
-> scan_expected<ranges::iterator_t<Input>>
{
static_assert(
ranges::contiguous_range<Pattern> &&
ranges::borrowed_range<Pattern> &&
std::is_same_v<ranges::range_value_t<Pattern>, CharT>);
static_assert(ranges::contiguous_range<Input> &&
ranges::borrowed_range<Input> &&
std::is_same_v<ranges::range_value_t<Input>, CharT>);
Expand Down Expand Up @@ -456,6 +448,28 @@ namespace scn {

#endif // !SCN_DISABLE_REGEX

inline std::string get_unescaped_regex_pattern(std::string_view pattern)
{
std::string result{pattern};
for (size_t n = 0;
(n = result.find("\\/", n)) != std::string::npos;) {
result.replace(n, 2, "/");
++n;
}
return result;
}
inline std::wstring get_unescaped_regex_pattern(
std::wstring_view pattern)
{
std::wstring result{pattern};
for (size_t n = 0;
(n = result.find(L"\\/", n)) != std::wstring::npos;) {
result.replace(n, 2, L"/");
++n;
}
return result;
}

template <typename SourceCharT>
struct regex_matches_reader
: public reader_base<regex_matches_reader<SourceCharT>,
Expand Down Expand Up @@ -505,14 +519,33 @@ namespace scn {
}

auto input = get_as_contiguous(range);
SCN_TRY(it, read_regex_matches_impl(specs.charset_string,
specs.regexp_flags,
input, value));
SCN_TRY(
it,
impl(input,
specs.type ==
detail::presentation_type::regex_escaped,
specs.charset_string, specs.regexp_flags, value));
return ranges_polyfill::batch_next(
ranges::begin(range),
ranges::distance(input.begin(), it));
}
}

private:
template <typename Range, typename DestCharT>
auto impl(const Range& input,
bool is_escaped,
std::basic_string_view<SourceCharT> pattern,
detail::regex_flags flags,
basic_regex_matches<DestCharT>& value)
{
if (is_escaped) {
return read_regex_matches_impl<SourceCharT>(
get_unescaped_regex_pattern(pattern), flags, input,
value);
}
return read_regex_matches_impl(pattern, flags, input, value);
}
};

template <typename CharT>
Expand Down
19 changes: 17 additions & 2 deletions src/scn/impl/reader/string_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -495,9 +495,12 @@ namespace scn {
break;

case detail::presentation_type::regex:
case detail::presentation_type::regex_escaped:
m_type = reader_type::regex;
break;

case detail::presentation_type::regex_escaped:
m_type = reader_type::regex_escaped;
break;
}

SCN_CLANG_POP // -Wswitch-enum, -Wcovered-switch-default
Expand Down Expand Up @@ -530,7 +533,13 @@ namespace scn {
}

protected:
enum class reader_type { word, character, character_set, regex };
enum class reader_type {
word,
character,
character_set,
regex,
regex_escaped,
};

template <typename Range, typename Value>
scan_expected<simple_borrowed_iterator_t<Range>> read_impl(
Expand Down Expand Up @@ -559,6 +568,12 @@ namespace scn {
return regex_string_reader_impl<SourceCharT>{}.read(
SCN_FWD(range), specs.charset_string,
specs.regexp_flags, value);

case reader_type::regex_escaped:
return regex_string_reader_impl<SourceCharT>{}.read(
SCN_FWD(range),
get_unescaped_regex_pattern(specs.charset_string),
specs.regexp_flags, value);
#endif

default:
Expand Down
7 changes: 7 additions & 0 deletions tests/unittests/regex_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,4 +269,11 @@ TEST(RegexTest, NoCaseAndNoCaptureFlagMatches)
testing::Property(&scn::regex_match::get, "FooBar123"sv))));
}

TEST(RegexTest, EscapedSlashInPattern) {
auto r = scn::scan<std::string_view>("foo/bar", "{:/[a-z]+\\/[a-z]+/}");
ASSERT_TRUE(r);
EXPECT_TRUE(r->range().empty());
EXPECT_THAT(r->value(), "foo/bar");
}

#endif // !SCN_DISABLE_REGEX

0 comments on commit 8653b54

Please sign in to comment.