From b8c0cfaba6d78975fe9958600ebd347ac65b5df8 Mon Sep 17 00:00:00 2001 From: Krishna Pai Date: Wed, 11 Dec 2024 16:12:10 -0800 Subject: [PATCH] fix: json_parse tape failure (#11831) Summary: SIMDJSON fails with a tape failure when if a ':' immediately follows the end of a \" when parsing even though ':' is outside the range of the input. This can happen if the previous string had a ':' at just the right location as we dont clear the buffer when processing strings. Fixes T210222818 . Reviewed By: amitkdutta Differential Revision: D67108827 --- velox/functions/prestosql/JsonFunctions.cpp | 7 ++++++- .../functions/prestosql/tests/JsonFunctionsTest.cpp | 13 +++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/velox/functions/prestosql/JsonFunctions.cpp b/velox/functions/prestosql/JsonFunctions.cpp index 078fe35e3fc9..3df47a7dd39e 100644 --- a/velox/functions/prestosql/JsonFunctions.cpp +++ b/velox/functions/prestosql/JsonFunctions.cpp @@ -234,8 +234,13 @@ class JsonParseFunction : public exec::VectorFunction { paddedInput_.data(), value.data(), size); if (maxSize < size) { paddedInput_.resize(size + simdjson::SIMDJSON_PADDING); + maxSize = size; } } else { + // We clear out the buffer since SIMDJSON peeks past the size of the + // string and can throw if a ':' comes after a '"'. + // issue : https://github.com/simdjson/simdjson/issues/2312 + memset(paddedInput_.data(), 0, paddedInput_.size()); memcpy(paddedInput_.data(), value.data(), size); } @@ -322,7 +327,7 @@ class JsonParseFunction : public exec::VectorFunction { // Remove the quotes from the keys before we sort them. auto af = std::string_view{a.first.data() + 1, a.first.size() - 2}; auto bf = std::string_view{b.first.data() + 1, b.first.size() - 2}; - return lessThan(a.first, b.first); + return lessThan(af, bf); }); jsonViews.push_back(kObjectStart); diff --git a/velox/functions/prestosql/tests/JsonFunctionsTest.cpp b/velox/functions/prestosql/tests/JsonFunctionsTest.cpp index 9bdfa9581cf8..fa0bd7057d28 100644 --- a/velox/functions/prestosql/tests/JsonFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/JsonFunctionsTest.cpp @@ -280,6 +280,19 @@ TEST_F(JsonFunctionsTest, jsonParse) { JSON()); velox::test::assertEqualVectors(expected, result); + // ':' are placed below to make parser think its a key and not a value. + // when processing the next string. + auto svData = { + "\"SomeVerylargeStringThatIsUsedAaaBbbService::someSortOfImpressions\""_sv, + "\"SomeBusinessClusterImagesSignal::genValue\""_sv, + "\"SomeVerylargeStringThatIsUsedAaaBbbCc::Service::someSortOfImpressions\""_sv, + "\"SomePreviewUtils::genMediaComponent\""_sv}; + + data = makeRowVector({makeFlatVector(svData)}); + expected = makeFlatVector(svData, JSON()); + result = evaluate("json_parse(c0)", data); + velox::test::assertEqualVectors(expected, result); + data = makeRowVector({makeConstant(R"("apple")", 2)}); result = evaluate("json_parse(c0)", data); expected = makeFlatVector({{R"("apple")", R"("apple")"}}, JSON());