Skip to content

Commit

Permalink
fix: json_parse tape failure (#11831)
Browse files Browse the repository at this point in the history
Summary:

SIMDJSON fails with a tape failure when if a ':' immediately follows the end of a \" when parsing even though ':' is outside the range of the input. This can happen if the previous string had a ':' at just the right location as we dont clear the buffer when processing strings. Fixes T210222818 .

Reviewed By: amitkdutta

Differential Revision: D67108827
  • Loading branch information
Krishna Pai authored and facebook-github-bot committed Dec 12, 2024
1 parent b44ffc9 commit b8c0cfa
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
7 changes: 6 additions & 1 deletion velox/functions/prestosql/JsonFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,13 @@ class JsonParseFunction : public exec::VectorFunction {
paddedInput_.data(), value.data(), size);
if (maxSize < size) {
paddedInput_.resize(size + simdjson::SIMDJSON_PADDING);
maxSize = size;
}
} else {
// We clear out the buffer since SIMDJSON peeks past the size of the
// string and can throw if a ':' comes after a '"'.
// issue : https://github.com/simdjson/simdjson/issues/2312
memset(paddedInput_.data(), 0, paddedInput_.size());
memcpy(paddedInput_.data(), value.data(), size);
}

Expand Down Expand Up @@ -322,7 +327,7 @@ class JsonParseFunction : public exec::VectorFunction {
// Remove the quotes from the keys before we sort them.
auto af = std::string_view{a.first.data() + 1, a.first.size() - 2};
auto bf = std::string_view{b.first.data() + 1, b.first.size() - 2};
return lessThan(a.first, b.first);
return lessThan(af, bf);
});

jsonViews.push_back(kObjectStart);
Expand Down
13 changes: 13 additions & 0 deletions velox/functions/prestosql/tests/JsonFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,19 @@ TEST_F(JsonFunctionsTest, jsonParse) {
JSON());
velox::test::assertEqualVectors(expected, result);

// ':' are placed below to make parser think its a key and not a value.
// when processing the next string.
auto svData = {
"\"SomeVerylargeStringThatIsUsedAaaBbbService::someSortOfImpressions\""_sv,
"\"SomeBusinessClusterImagesSignal::genValue\""_sv,
"\"SomeVerylargeStringThatIsUsedAaaBbbCc::Service::someSortOfImpressions\""_sv,
"\"SomePreviewUtils::genMediaComponent\""_sv};

data = makeRowVector({makeFlatVector<StringView>(svData)});
expected = makeFlatVector<StringView>(svData, JSON());
result = evaluate("json_parse(c0)", data);
velox::test::assertEqualVectors(expected, result);

data = makeRowVector({makeConstant(R"("apple")", 2)});
result = evaluate("json_parse(c0)", data);
expected = makeFlatVector<StringView>({{R"("apple")", R"("apple")"}}, JSON());
Expand Down

0 comments on commit b8c0cfa

Please sign in to comment.