Skip to content

Commit

Permalink
fix bitmap64 serialization issue
Browse files Browse the repository at this point in the history
  • Loading branch information
zhicwu committed Feb 19, 2023
1 parent 8cf690e commit 1f3ef08
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 33 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* potential endless loop when handling batch update error. [#1233](https://github.com/ClickHouse/clickhouse-java/issues/1233)
* exception when deserializing Array(FixedString(2)) from RowBinary. [#1235](https://github.com/ClickHouse/clickhouse-java/issues/1235)
* deserialization failure of Nested data type. [#1240](https://github.com/ClickHouse/clickhouse-java/issues/1240)
* fix 64bit bitmap serialization issue. [#641](https://github.com/ClickHouse/clickhouse-java/issues/641), [#874](https://github.com/ClickHouse/clickhouse-java/issues/874), [#1141](https://github.com/ClickHouse/clickhouse-java/issues/1141)

## 0.4.0, 2023-01-19
### Breaking changes
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,18 @@ Java libraries for connecting to ClickHouse and processing data in various forma
| API | [JDBC](https://docs.oracle.com/javase/8/docs/technotes/guides/jdbc/) | :white_check_mark: | |
| | [R2DBC](https://r2dbc.io/) | :white_check_mark: | supported since 0.4.0 |
| Query Language | SQL | :white_check_mark: | |
| | [PRQL](https://prql-lang.org/) | :x: | |
| | [GraphQL](https://graphql.org/) | :x: | |
| | [PRQL](https://prql-lang.org/) | :x: | will be available in 0.4.2 |
| | [GraphQL](https://graphql.org/) | :x: | will be available in 0.4.2 |
| Protocol | [HTTP](https://clickhouse.com/docs/en/interfaces/http/) | :white_check_mark: | recommended, defaults to `java.net.HttpURLConnection` and it can be changed to `java.net.http.HttpClient`(unstable) or `Apache HTTP Client 5`. Note that the latter was added in 0.4.0 to support custom socket options. |
| | [gRPC](https://clickhouse.com/docs/en/interfaces/grpc/) | :white_check_mark: | still experimental, works with 22.3+, known to has [issue](https://github.com/ClickHouse/ClickHouse/issues/28671#issuecomment-1087049993) when using LZ4 compression |
| | [gRPC](https://clickhouse.com/docs/en/interfaces/grpc/) | :white_check_mark: | :warning: experimental, works with 22.3+, known to has issue with lz4 compression and may cause high memory usage on server |
| | [TCP/Native](https://clickhouse.com/docs/en/interfaces/tcp/) | :white_check_mark: | `clickhouse-cli-client`(wrapper of ClickHouse native command-line client) was added in 0.3.2-patch10, `clickhouse-tcp-client` will be available in 0.5 |
| | [Local/File](https://clickhouse.com/docs/en/operations/utilities/clickhouse-local/) | :x: | `clickhouse-cli-client` will be enhanced to support `clickhouse-local` |
| Compatibility | Server < 20.7 | :x: | use 0.3.1-patch(or 0.2.6 if you're stuck with JDK 7) |
| | Server >= 20.7 | :white_check_mark: | use 0.3.2 or above. All [active releases](https://github.com/ClickHouse/ClickHouse/pulls?q=is%3Aopen+is%3Apr+label%3Arelease) are supported. |
| Compression | [lz4](https://lz4.github.io/lz4/) | :white_check_mark: | default |
| | [zstd](https://facebook.github.io/zstd/) | :white_check_mark: | supported since 0.4.0, works with ClickHouse 22.10+ |
| Data Format | RowBinary | :white_check_mark: | `RowBinaryWithNamesAndTypes` for query and `RowBinary` for insertion |
| | TabSeparated | :white_check_mark: | Does not support as many data types as RowBinary |
| | TabSeparated | :white_check_mark: | :warning: does not support as many data types as RowBinary |
| Data Type | AggregatedFunction | :x: | :warning: limited to `groupBitmap`; 64bit bitmap was NOT working properly before 0.4.1 |
| | Array(\*) | :white_check_mark: | |
| | Bool | :white_check_mark: | |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,18 +172,14 @@ public void serialize(ByteBuffer buffer) {
try (ByteArrayOutputStream bas = new ByteArrayOutputStream(size)) {
DataOutput out = new DataOutputStream(bas);
try {
// https://github.com/RoaringBitmap/RoaringBitmap/blob/0.9.9/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1105
rb.serialize(out);
// https://github.com/RoaringBitmap/RoaringBitmap/blob/fd54c0a100629bb578946e2a0bf8b62784878fa8/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1253
rb.serializePortable(out);
} catch (IOException e) {
throw new IllegalArgumentException("Failed to serialize given bitmap", e);
}

byte[] bytes = bas.toByteArray();
for (int i = 4; i > 0; i--) {
buffer.put(bytes[i]);
}
buffer.putInt(0);
buffer.put(bytes, 5, size - 5);
buffer.put(bytes, 0, size);
} catch (IOException e) {
throw new IllegalStateException("Failed to serialize given bitmap", e);
}
Expand All @@ -196,6 +192,13 @@ public int serializedSizeInBytes() {

@Override
public long serializedSizeInBytesAsLong() {
// no idea why it's implemented this way...
// https://github.com/RoaringBitmap/RoaringBitmap/blob/fd54c0a100629bb578946e2a0bf8b62784878fa8/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1371-L1380
// TODO completely drop RoaringBitmap dependency
if (Roaring64NavigableMap.SERIALIZATION_MODE != Roaring64NavigableMap.SERIALIZATION_MODE_PORTABLE) {
throw new IllegalStateException(
"Please change Roaring64NavigableMap.SERIALIZATION_MODE to portable first");
}
return rb.serializedSizeInBytes();
}

Expand Down Expand Up @@ -364,20 +367,10 @@ public static ClickHouseBitmap deserialize(DataInputStream in, ClickHouseDataTyp
b.deserialize(flip(newBuffer(len).put(bytes)));
rb = ClickHouseBitmap.wrap(b, innerType);
} else {
// TODO implement a wrapper of DataInput to get rid of byte array here
bytes[0] = (byte) 0; // always unsigned
// read map size in big-endian byte order
for (int i = 4; i > 0; i--) {
bytes[i] = in.readByte();
}
if (in.readByte() != 0 || in.readByte() != 0 || in.readByte() != 0 || in.readByte() != 0) { // NOSONAR
throw new IllegalStateException(
"Not able to deserialize ClickHouseBitmap for too many bitmaps(>" + 0xFFFFFFFFL + ")!");
}
// read the rest
in.readFully(bytes, 5, len - 8);
in.readFully(bytes, 0, len);
Roaring64NavigableMap b = new Roaring64NavigableMap();
b.deserialize(new DataInputStream(new ByteArrayInputStream(bytes)));
// https://github.com/RoaringBitmap/RoaringBitmap/blob/fd54c0a100629bb578946e2a0bf8b62784878fa8/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1337
b.deserializePortable(new DataInputStream(new ByteArrayInputStream(bytes)));
rb = ClickHouseBitmap.wrap(b, innerType);
}
}
Expand Down Expand Up @@ -564,9 +557,9 @@ public long[] toLongArray() {
public ByteBuffer toByteBuffer() {
ByteBuffer buf;

int cardinality = getCardinality();
if (cardinality <= 32) {
buf = newBuffer(2 + byteLen * cardinality);
long cardinality = getLongCardinality();
if (cardinality <= 32L) {
buf = newBuffer(2 + byteLen * (int) cardinality);
buf.put((byte) 0);
buf.put((byte) cardinality);
if (byteLen == 1) {
Expand Down Expand Up @@ -595,12 +588,7 @@ public ByteBuffer toByteBuffer() {
ClickHouseByteUtils.setVarInt(buf, size);
serialize(buf);
} else { // 64
// 1) deduct one to exclude the leading byte - boolean flag, see below:
// https://github.com/RoaringBitmap/RoaringBitmap/blob/0.9.9/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1107
// 2) add 4 bytes because CRoaring uses long to store count of 32-bit bitmaps,
// while Java uses int - see
// https://github.com/RoaringBitmap/CRoaring/blob/v0.2.66/cpp/roaring64map.hh#L597
long size = serializedSizeInBytesAsLong() - 1 + 4;
long size = serializedSizeInBytesAsLong();
int varIntSize = ClickHouseByteUtils.getVarLongSize(size);
// TODO add serialize(DataOutput) to handle more
int intSize = (int) size;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,14 @@
import com.clickhouse.client.http.config.ClickHouseHttpOption;
import com.clickhouse.data.ClickHouseDataType;
import com.clickhouse.data.ClickHouseValues;
import com.clickhouse.data.value.ClickHouseBitmap;
import com.clickhouse.data.value.ClickHouseDateTimeValue;
import com.clickhouse.data.value.UnsignedByte;
import com.clickhouse.data.value.UnsignedInteger;
import com.clickhouse.data.value.UnsignedLong;
import com.clickhouse.data.value.UnsignedShort;

import org.roaringbitmap.longlong.Roaring64NavigableMap;
import org.testng.Assert;
import org.testng.SkipException;
import org.testng.annotations.DataProvider;
Expand All @@ -65,6 +67,55 @@ private Object[][] getConnectionProperties() {
new Object[] { emptyProps }, new Object[] { sessionProps } };
}

@Test(groups = "integration")
public void testBitmap64() throws SQLException {
Properties props = new Properties();
String sql = "select k,\n"
+ "[ tuple(arraySort(groupUniqArrayIf(n, n > 33)), groupBitmapStateIf(n, n > 33)),\n"
+ " tuple(arraySort(groupUniqArrayIf(n, n < 32)), groupBitmapStateIf(n, n < 32)),\n"
+ " tuple(arraySort(groupUniqArray(n)), groupBitmapState(n)),\n"
+ " tuple(arraySort(groupUniqArray(v)), groupBitmapState(v))\n"
+ "]::Array(Tuple(Array(UInt64), AggregateFunction(groupBitmap, UInt64))) v\n"
+ "from (select 'k' k, (number % 33)::UInt64 as n, (9223372036854775807 + number::Int16)::UInt64 v from numbers(300000))\n"
+ "group by k";
try (ClickHouseConnection conn = newConnection(props);
ClickHouseStatement stmt = conn.createStatement()) {
stmt.execute("drop table if exists test_bitmap64_serde; "
+ "create table test_bitmap64_serde(k String, v Array(Tuple(Array(UInt64), AggregateFunction(groupBitmap, UInt64))))engine=Memory");
try (PreparedStatement ps = conn.prepareStatement("insert into test_bitmap64_serde");
ResultSet rs = stmt.executeQuery(sql)) {
Assert.assertTrue(rs.next());
Assert.assertEquals(rs.getString(1), "k");
ps.setString(1, rs.getString(1));
Object[] values = (Object[]) rs.getObject(2);
ps.setObject(2, values);
Assert.assertEquals(values.length, 4);
for (int i = 0; i < values.length; i++) {
List<?> tuple = (List<?>) values[i];
Assert.assertEquals(tuple.size(), 2);
long[] nums = (long[]) tuple.get(0);
ClickHouseBitmap bitmap = (ClickHouseBitmap) tuple.get(1);
Roaring64NavigableMap bitmap64 = (Roaring64NavigableMap) bitmap.unwrap();
Assert.assertEquals(nums.length, bitmap64.getLongCardinality());
for (int j = 0; j < nums.length; j++) {
Assert.assertTrue(bitmap64.contains(nums[j]), "Bitmap does not contain value: " + nums[j]);
}
}
Assert.assertFalse(rs.next());

// Assert.assertThrows(IllegalStateException.class, () -> ps.executeUpdate());
Roaring64NavigableMap.SERIALIZATION_MODE = Roaring64NavigableMap.SERIALIZATION_MODE_PORTABLE;
ps.executeUpdate();
}

stmt.execute("insert into test_bitmap64_serde\n" + sql);
try (ResultSet rs = stmt.executeQuery("select distinct * from test_bitmap64_serde")) {
Assert.assertTrue(rs.next(), "Should have at least one row");
Assert.assertFalse(rs.next(), "Should have only one unique row");
}
}
}

@Test(groups = "integration")
public void testDialect() throws SQLException {
Properties props = new Properties();
Expand Down

0 comments on commit 1f3ef08

Please sign in to comment.