diff --git a/CHANGELOG.md b/CHANGELOG.md index d9d254fc9..cf833ce44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ * potential endless loop when handling batch update error. [#1233](https://github.com/ClickHouse/clickhouse-java/issues/1233) * exception when deserializing Array(FixedString(2)) from RowBinary. [#1235](https://github.com/ClickHouse/clickhouse-java/issues/1235) * deserialization failure of Nested data type. [#1240](https://github.com/ClickHouse/clickhouse-java/issues/1240) +* fix 64bit bitmap serialization issue. [#641](https://github.com/ClickHouse/clickhouse-java/issues/641), [#874](https://github.com/ClickHouse/clickhouse-java/issues/874), [#1141](https://github.com/ClickHouse/clickhouse-java/issues/1141) ## 0.4.0, 2023-01-19 ### Breaking changes diff --git a/README.md b/README.md index 07ccfd0af..c69190ebb 100644 --- a/README.md +++ b/README.md @@ -13,10 +13,10 @@ Java libraries for connecting to ClickHouse and processing data in various forma | API | [JDBC](https://docs.oracle.com/javase/8/docs/technotes/guides/jdbc/) | :white_check_mark: | | | | [R2DBC](https://r2dbc.io/) | :white_check_mark: | supported since 0.4.0 | | Query Language | SQL | :white_check_mark: | | -| | [PRQL](https://prql-lang.org/) | :x: | | -| | [GraphQL](https://graphql.org/) | :x: | | +| | [PRQL](https://prql-lang.org/) | :x: | will be available in 0.4.2 | +| | [GraphQL](https://graphql.org/) | :x: | will be available in 0.4.2 | | Protocol | [HTTP](https://clickhouse.com/docs/en/interfaces/http/) | :white_check_mark: | recommended, defaults to `java.net.HttpURLConnection` and it can be changed to `java.net.http.HttpClient`(unstable) or `Apache HTTP Client 5`. Note that the latter was added in 0.4.0 to support custom socket options. | -| | [gRPC](https://clickhouse.com/docs/en/interfaces/grpc/) | :white_check_mark: | still experimental, works with 22.3+, known to has [issue](https://github.com/ClickHouse/ClickHouse/issues/28671#issuecomment-1087049993) when using LZ4 compression | +| | [gRPC](https://clickhouse.com/docs/en/interfaces/grpc/) | :white_check_mark: | :warning: experimental, works with 22.3+, known to has issue with lz4 compression and may cause high memory usage on server | | | [TCP/Native](https://clickhouse.com/docs/en/interfaces/tcp/) | :white_check_mark: | `clickhouse-cli-client`(wrapper of ClickHouse native command-line client) was added in 0.3.2-patch10, `clickhouse-tcp-client` will be available in 0.5 | | | [Local/File](https://clickhouse.com/docs/en/operations/utilities/clickhouse-local/) | :x: | `clickhouse-cli-client` will be enhanced to support `clickhouse-local` | | Compatibility | Server < 20.7 | :x: | use 0.3.1-patch(or 0.2.6 if you're stuck with JDK 7) | @@ -24,7 +24,7 @@ Java libraries for connecting to ClickHouse and processing data in various forma | Compression | [lz4](https://lz4.github.io/lz4/) | :white_check_mark: | default | | | [zstd](https://facebook.github.io/zstd/) | :white_check_mark: | supported since 0.4.0, works with ClickHouse 22.10+ | | Data Format | RowBinary | :white_check_mark: | `RowBinaryWithNamesAndTypes` for query and `RowBinary` for insertion | -| | TabSeparated | :white_check_mark: | Does not support as many data types as RowBinary | +| | TabSeparated | :white_check_mark: | :warning: does not support as many data types as RowBinary | | Data Type | AggregatedFunction | :x: | :warning: limited to `groupBitmap`; 64bit bitmap was NOT working properly before 0.4.1 | | | Array(\*) | :white_check_mark: | | | | Bool | :white_check_mark: | | diff --git a/clickhouse-data/src/main/java/com/clickhouse/data/value/ClickHouseBitmap.java b/clickhouse-data/src/main/java/com/clickhouse/data/value/ClickHouseBitmap.java index 5eb578850..e57247dec 100644 --- a/clickhouse-data/src/main/java/com/clickhouse/data/value/ClickHouseBitmap.java +++ b/clickhouse-data/src/main/java/com/clickhouse/data/value/ClickHouseBitmap.java @@ -172,18 +172,14 @@ public void serialize(ByteBuffer buffer) { try (ByteArrayOutputStream bas = new ByteArrayOutputStream(size)) { DataOutput out = new DataOutputStream(bas); try { - // https://github.com/RoaringBitmap/RoaringBitmap/blob/0.9.9/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1105 - rb.serialize(out); + // https://github.com/RoaringBitmap/RoaringBitmap/blob/fd54c0a100629bb578946e2a0bf8b62784878fa8/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1253 + rb.serializePortable(out); } catch (IOException e) { throw new IllegalArgumentException("Failed to serialize given bitmap", e); } byte[] bytes = bas.toByteArray(); - for (int i = 4; i > 0; i--) { - buffer.put(bytes[i]); - } - buffer.putInt(0); - buffer.put(bytes, 5, size - 5); + buffer.put(bytes, 0, size); } catch (IOException e) { throw new IllegalStateException("Failed to serialize given bitmap", e); } @@ -196,6 +192,13 @@ public int serializedSizeInBytes() { @Override public long serializedSizeInBytesAsLong() { + // no idea why it's implemented this way... + // https://github.com/RoaringBitmap/RoaringBitmap/blob/fd54c0a100629bb578946e2a0bf8b62784878fa8/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1371-L1380 + // TODO completely drop RoaringBitmap dependency + if (Roaring64NavigableMap.SERIALIZATION_MODE != Roaring64NavigableMap.SERIALIZATION_MODE_PORTABLE) { + throw new IllegalStateException( + "Please change Roaring64NavigableMap.SERIALIZATION_MODE to portable first"); + } return rb.serializedSizeInBytes(); } @@ -364,20 +367,10 @@ public static ClickHouseBitmap deserialize(DataInputStream in, ClickHouseDataTyp b.deserialize(flip(newBuffer(len).put(bytes))); rb = ClickHouseBitmap.wrap(b, innerType); } else { - // TODO implement a wrapper of DataInput to get rid of byte array here - bytes[0] = (byte) 0; // always unsigned - // read map size in big-endian byte order - for (int i = 4; i > 0; i--) { - bytes[i] = in.readByte(); - } - if (in.readByte() != 0 || in.readByte() != 0 || in.readByte() != 0 || in.readByte() != 0) { // NOSONAR - throw new IllegalStateException( - "Not able to deserialize ClickHouseBitmap for too many bitmaps(>" + 0xFFFFFFFFL + ")!"); - } - // read the rest - in.readFully(bytes, 5, len - 8); + in.readFully(bytes, 0, len); Roaring64NavigableMap b = new Roaring64NavigableMap(); - b.deserialize(new DataInputStream(new ByteArrayInputStream(bytes))); + // https://github.com/RoaringBitmap/RoaringBitmap/blob/fd54c0a100629bb578946e2a0bf8b62784878fa8/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1337 + b.deserializePortable(new DataInputStream(new ByteArrayInputStream(bytes))); rb = ClickHouseBitmap.wrap(b, innerType); } } @@ -564,9 +557,9 @@ public long[] toLongArray() { public ByteBuffer toByteBuffer() { ByteBuffer buf; - int cardinality = getCardinality(); - if (cardinality <= 32) { - buf = newBuffer(2 + byteLen * cardinality); + long cardinality = getLongCardinality(); + if (cardinality <= 32L) { + buf = newBuffer(2 + byteLen * (int) cardinality); buf.put((byte) 0); buf.put((byte) cardinality); if (byteLen == 1) { @@ -595,12 +588,7 @@ public ByteBuffer toByteBuffer() { ClickHouseByteUtils.setVarInt(buf, size); serialize(buf); } else { // 64 - // 1) deduct one to exclude the leading byte - boolean flag, see below: - // https://github.com/RoaringBitmap/RoaringBitmap/blob/0.9.9/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1107 - // 2) add 4 bytes because CRoaring uses long to store count of 32-bit bitmaps, - // while Java uses int - see - // https://github.com/RoaringBitmap/CRoaring/blob/v0.2.66/cpp/roaring64map.hh#L597 - long size = serializedSizeInBytesAsLong() - 1 + 4; + long size = serializedSizeInBytesAsLong(); int varIntSize = ClickHouseByteUtils.getVarLongSize(size); // TODO add serialize(DataOutput) to handle more int intSize = (int) size; diff --git a/clickhouse-jdbc/src/test/java/com/clickhouse/jdbc/ClickHouseStatementTest.java b/clickhouse-jdbc/src/test/java/com/clickhouse/jdbc/ClickHouseStatementTest.java index 0c647a5ac..4449848d4 100644 --- a/clickhouse-jdbc/src/test/java/com/clickhouse/jdbc/ClickHouseStatementTest.java +++ b/clickhouse-jdbc/src/test/java/com/clickhouse/jdbc/ClickHouseStatementTest.java @@ -38,12 +38,14 @@ import com.clickhouse.client.http.config.ClickHouseHttpOption; import com.clickhouse.data.ClickHouseDataType; import com.clickhouse.data.ClickHouseValues; +import com.clickhouse.data.value.ClickHouseBitmap; import com.clickhouse.data.value.ClickHouseDateTimeValue; import com.clickhouse.data.value.UnsignedByte; import com.clickhouse.data.value.UnsignedInteger; import com.clickhouse.data.value.UnsignedLong; import com.clickhouse.data.value.UnsignedShort; +import org.roaringbitmap.longlong.Roaring64NavigableMap; import org.testng.Assert; import org.testng.SkipException; import org.testng.annotations.DataProvider; @@ -65,6 +67,55 @@ private Object[][] getConnectionProperties() { new Object[] { emptyProps }, new Object[] { sessionProps } }; } + @Test(groups = "integration") + public void testBitmap64() throws SQLException { + Properties props = new Properties(); + String sql = "select k,\n" + + "[ tuple(arraySort(groupUniqArrayIf(n, n > 33)), groupBitmapStateIf(n, n > 33)),\n" + + " tuple(arraySort(groupUniqArrayIf(n, n < 32)), groupBitmapStateIf(n, n < 32)),\n" + + " tuple(arraySort(groupUniqArray(n)), groupBitmapState(n)),\n" + + " tuple(arraySort(groupUniqArray(v)), groupBitmapState(v))\n" + + "]::Array(Tuple(Array(UInt64), AggregateFunction(groupBitmap, UInt64))) v\n" + + "from (select 'k' k, (number % 33)::UInt64 as n, (9223372036854775807 + number::Int16)::UInt64 v from numbers(300000))\n" + + "group by k"; + try (ClickHouseConnection conn = newConnection(props); + ClickHouseStatement stmt = conn.createStatement()) { + stmt.execute("drop table if exists test_bitmap64_serde; " + + "create table test_bitmap64_serde(k String, v Array(Tuple(Array(UInt64), AggregateFunction(groupBitmap, UInt64))))engine=Memory"); + try (PreparedStatement ps = conn.prepareStatement("insert into test_bitmap64_serde"); + ResultSet rs = stmt.executeQuery(sql)) { + Assert.assertTrue(rs.next()); + Assert.assertEquals(rs.getString(1), "k"); + ps.setString(1, rs.getString(1)); + Object[] values = (Object[]) rs.getObject(2); + ps.setObject(2, values); + Assert.assertEquals(values.length, 4); + for (int i = 0; i < values.length; i++) { + List tuple = (List) values[i]; + Assert.assertEquals(tuple.size(), 2); + long[] nums = (long[]) tuple.get(0); + ClickHouseBitmap bitmap = (ClickHouseBitmap) tuple.get(1); + Roaring64NavigableMap bitmap64 = (Roaring64NavigableMap) bitmap.unwrap(); + Assert.assertEquals(nums.length, bitmap64.getLongCardinality()); + for (int j = 0; j < nums.length; j++) { + Assert.assertTrue(bitmap64.contains(nums[j]), "Bitmap does not contain value: " + nums[j]); + } + } + Assert.assertFalse(rs.next()); + + // Assert.assertThrows(IllegalStateException.class, () -> ps.executeUpdate()); + Roaring64NavigableMap.SERIALIZATION_MODE = Roaring64NavigableMap.SERIALIZATION_MODE_PORTABLE; + ps.executeUpdate(); + } + + stmt.execute("insert into test_bitmap64_serde\n" + sql); + try (ResultSet rs = stmt.executeQuery("select distinct * from test_bitmap64_serde")) { + Assert.assertTrue(rs.next(), "Should have at least one row"); + Assert.assertFalse(rs.next(), "Should have only one unique row"); + } + } + } + @Test(groups = "integration") public void testDialect() throws SQLException { Properties props = new Properties();