diff --git a/velox/functions/prestosql/aggregates/PrestoHasher.cpp b/velox/functions/prestosql/aggregates/PrestoHasher.cpp index abbcaca0ed28..0df9c7756ebd 100644 --- a/velox/functions/prestosql/aggregates/PrestoHasher.cpp +++ b/velox/functions/prestosql/aggregates/PrestoHasher.cpp @@ -21,12 +21,17 @@ #include #include "velox/functions/lib/RowsTranslationUtil.h" +#include "velox/functions/prestosql/types/IPAddressType.h" #include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h" namespace facebook::velox::aggregate { namespace { +FOLLY_ALWAYS_INLINE uint64_t hashBuffer(const uint8_t* data, size_t size) { + return XXH64(data, size, 0); +} + template FOLLY_ALWAYS_INLINE int64_t hashInteger(const T& value) { return XXH64_round(0, value); @@ -171,7 +176,14 @@ FOLLY_ALWAYS_INLINE void PrestoHasher::hash( const SelectivityVector& rows, BufferPtr& hashes) { applyHashFunction(rows, *vector_.get(), hashes, [&](auto row) { + auto type = vector_->base()->type(); auto value = vector_->valueAt(row); + + if (isIPAddressType(type)) { + auto byteArray = ipaddress::toIPv6ByteArray(value); + return hashBuffer(byteArray.data(), byteArray.size()); + } + // Presto Java UnscaledDecimal128 representation uses signed magnitude // representation. Only negative values differ in this representation. // The processing here is mainly for the convenience of hash computation. diff --git a/velox/functions/prestosql/aggregates/tests/PrestoHasherTest.cpp b/velox/functions/prestosql/aggregates/tests/PrestoHasherTest.cpp index 71246eed6653..6b1c004b9283 100644 --- a/velox/functions/prestosql/aggregates/tests/PrestoHasherTest.cpp +++ b/velox/functions/prestosql/aggregates/tests/PrestoHasherTest.cpp @@ -17,6 +17,7 @@ #include #include "velox/functions/prestosql/aggregates/PrestoHasher.h" +#include "velox/functions/prestosql/types/IPAddressType.h" #include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h" #include "velox/type/tz/TimeZoneMap.h" #include "velox/vector/tests/utils/VectorTestBase.h" @@ -430,4 +431,22 @@ TEST_F(PrestoHasherTest, timestampWithTimezone) { {-3461822077982309083, -3461822077982309083, -8497890125589769483, 0}); } +TEST_F(PrestoHasherTest, ipAddress) { + auto makeIpAdressFromString = [](const std::string& ipAddr) -> int128_t { + auto ret = ipaddress::tryGetIPv6asInt128FromString(ipAddr); + return ret.value(); + }; + + auto ipAddresses = makeNullableFlatVector( + std::vector>{ + makeIpAdressFromString("2001:db8::ff00:42:8329"), + makeIpAdressFromString("192.168.1.1"), + makeIpAdressFromString("::ffff:1.2.3.4"), + std::nullopt}, + IPADDRESS()); + + assertHash( + ipAddresses, + {-4694347089639306204, 2704428192845283049, -1632332718929005309, 0}); +} } // namespace facebook::velox::aggregate::test diff --git a/velox/functions/prestosql/types/IPAddressType.h b/velox/functions/prestosql/types/IPAddressType.h index 9c6c880f9e82..8c5e809a989e 100644 --- a/velox/functions/prestosql/types/IPAddressType.h +++ b/velox/functions/prestosql/types/IPAddressType.h @@ -28,6 +28,15 @@ constexpr int kIPV4ToV6FFIndex = 10; constexpr int kIPV4ToV6Index = 12; constexpr int kIPAddressBytes = 16; +inline folly::ByteArray16 toIPv6ByteArray(const int128_t& ipAddr) { + folly::ByteArray16 bytes{{0}}; + memcpy(bytes.data(), &ipAddr, sizeof(ipAddr)); + // Reverse because the velox is always on little endian system + // and the byte array needs to be big endian (network byte order) + std::reverse(bytes.begin(), bytes.end()); + return bytes; +} + inline folly::Expected tryGetIPv6asInt128FromString(const std::string& ipAddressStr) { auto maybeIp = folly::IPAddress::tryFromString(ipAddressStr); @@ -56,8 +65,8 @@ class IPAddressType : public HugeintType { } int32_t compare(const int128_t& left, const int128_t& right) const override { - const auto leftAddrBytes = toIPv6ByteArray(left); - const auto rightAddrBytes = toIPv6ByteArray(right); + const auto leftAddrBytes = ipaddress::toIPv6ByteArray(left); + const auto rightAddrBytes = ipaddress::toIPv6ByteArray(right); return memcmp( leftAddrBytes.begin(), rightAddrBytes.begin(), @@ -87,16 +96,6 @@ class IPAddressType : public HugeintType { obj["type"] = name(); return obj; } - - private: - static folly::ByteArray16 toIPv6ByteArray(const int128_t& ipAddr) { - folly::ByteArray16 bytes{{0}}; - memcpy(bytes.data(), &ipAddr, sizeof(ipAddr)); - // Reverse because the velox is always on little endian system - // and the byte array needs to be big endian (network byte order) - std::reverse(bytes.begin(), bytes.end()); - return bytes; - } }; FOLLY_ALWAYS_INLINE bool isIPAddressType(const TypePtr& type) {