From 36077e9773a4c8ece41ee9190e80e060686498fd Mon Sep 17 00:00:00 2001 From: Laksh Singla Date: Fri, 8 Sep 2023 03:03:29 +0530 Subject: [PATCH] remove todos, add javadocs --- .../druid/frame/field/DoubleFieldWriter.java | 2 +- .../druid/frame/field/FloatFieldWriter.java | 2 +- .../druid/frame/field/LongFieldWriter.java | 2 +- .../frame/field/NumericArrayFieldReader.java | 5 +- .../frame/field/NumericArrayFieldWriter.java | 11 ++-- .../druid/frame/field/NumericFieldWriter.java | 53 +++++++++++++++++-- .../field/NumericFieldWriterFactory.java | 7 ++- .../druid/frame/field/TransformUtils.java | 6 +++ .../apache/druid/frame/read/FrameReader.java | 16 ++++-- .../field/DoubleArrayFieldReaderTest.java | 1 - .../field/FloatArrayFieldReaderTest.java | 1 - .../frame/field/LongArrayFieldReaderTest.java | 1 - .../druid/frame/field/TransformUtilsTest.java | 1 - .../druid/frame/write/FrameWriterTest.java | 12 +++-- 14 files changed, 95 insertions(+), 25 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/field/DoubleFieldWriter.java b/processing/src/main/java/org/apache/druid/frame/field/DoubleFieldWriter.java index a68827853e54..0fb08d3098ce 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/DoubleFieldWriter.java +++ b/processing/src/main/java/org/apache/druid/frame/field/DoubleFieldWriter.java @@ -48,7 +48,7 @@ private DoubleFieldWriter(final BaseDoubleColumnValueSelector selector, final bo } @Override - public int getNumericSize() + public int getNumericSizeBytes() { return Double.BYTES; } diff --git a/processing/src/main/java/org/apache/druid/frame/field/FloatFieldWriter.java b/processing/src/main/java/org/apache/druid/frame/field/FloatFieldWriter.java index 11ddeb396342..6f11ebc0ab16 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/FloatFieldWriter.java +++ b/processing/src/main/java/org/apache/druid/frame/field/FloatFieldWriter.java @@ -54,7 +54,7 @@ public void close() } @Override - public int getNumericSize() + public int getNumericSizeBytes() { return Float.BYTES; } diff --git a/processing/src/main/java/org/apache/druid/frame/field/LongFieldWriter.java b/processing/src/main/java/org/apache/druid/frame/field/LongFieldWriter.java index 4a68209e2014..db484c12a33b 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/LongFieldWriter.java +++ b/processing/src/main/java/org/apache/druid/frame/field/LongFieldWriter.java @@ -51,7 +51,7 @@ private LongFieldWriter(final BaseLongColumnValueSelector selector, final boolea } @Override - public int getNumericSize() + public int getNumericSizeBytes() { return Long.BYTES; } diff --git a/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldReader.java index 55d176d8bd62..9c6a2ab08b9d 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldReader.java @@ -39,8 +39,7 @@ public DimensionSelector makeDimensionSelector( @Nullable ExtractionFn extractionFn ) { - // TODO(laksh): Should I throw an exception here - return DimensionSelector.nilSelector(); + throw DruidException.defensive("Cannot call makeDimensionSelector on field of type ARRAY"); } @Override @@ -159,7 +158,7 @@ private void updateCurrentArray(final long fieldPosition) return; } - // TODO(laksh): add comment about position < limit check + // Adding a check here to prevent the position from potentially overflowing if (position < limit) { position++; } diff --git a/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldWriter.java b/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldWriter.java index 3cbb2471a560..af81b20b0b3f 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldWriter.java +++ b/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldWriter.java @@ -121,7 +121,12 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) @Override public boolean isNull() { - // TODO(laksh): Add a comment why NullHandling.replaceWithDefault() is not required here + // Arrays preserve the individual element's nullity when they are written and read. + // Therefore, when working with SQL incompatible mode, [7, null] won't change to [7, 0] when written to and + // read from the underlying serialization (as compared with the primitives). Therefore, + // even when NullHandling.replaceWithDefault() is true we need to write null as is, and not convert it to their + // default value when writing the array. Therefore, the check is `getObject() == null` ignoring the value of + // `NullHandling.replaceWithDefaul()`. return getObject() == null; } @@ -141,7 +146,7 @@ public Class classOfObject() NumericFieldWriter writer = writerFactory.get(columnValueSelector); - int requiredSize = Byte.BYTES + (writer.getNumericSize() + Byte.BYTES) * list.size() + Byte.BYTES; + int requiredSize = Byte.BYTES + (writer.getNumericSizeBytes() + Byte.BYTES) * list.size() + Byte.BYTES; if (requiredSize > maxSize) { return -1; @@ -157,7 +162,7 @@ public Class classOfObject() position + offset, maxSize - offset ); - offset += Byte.BYTES + writer.getNumericSize(); + offset += Byte.BYTES + writer.getNumericSizeBytes(); } memory.putByte(position + offset, ARRAY_TERMINATOR); diff --git a/processing/src/main/java/org/apache/druid/frame/field/NumericFieldWriter.java b/processing/src/main/java/org/apache/druid/frame/field/NumericFieldWriter.java index bcf47af4080c..4dcdb096a369 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/NumericFieldWriter.java +++ b/processing/src/main/java/org/apache/druid/frame/field/NumericFieldWriter.java @@ -22,14 +22,46 @@ import org.apache.datasketches.memory.WritableMemory; import org.apache.druid.segment.BaseNullableColumnValueSelector; -// TODO(laksh): Add comment here +/** + * FieldWriter for numeric datatypes. The parent class does the null handling for the underlying data, while + * the individual subclasses write the individual element (long, float or double type). This also allows for a clean + * reuse while creating {@link NumericArrayFieldWriter} + *

+ * Indicator bytes for denoting whether the element is null or not null changes depending on whether the writer is used + * to write the data for individual value (like LONG) or for an element of an array (like ARRAY). This is because + * array support for the numeric types was added later and by then the field writers for individual fields were using + * 0x00 to denote the null byte, which is reserved for denoting the array end when we are writing the elements as part + * of the array instead. (0x00 is used for array end because it helps in preserving the byte comparison property of the + * numeric array field writers). + *

+ * Therefore, to preserve backward and forward compatibility, the individual element's writers were left unchanged, + * while the array's element's writers used 0x01 and 0x02 to denote null and non-null byte respectively + */ public abstract class NumericFieldWriter implements FieldWriter { + /** + * Indicator byte denoting that the numeric value succeeding it is null. This is used in the primitive + * writers. NULL_BYTE < NOT_NULL_BYTE to preserve the ordering while doing byte comparison + */ public static final byte NULL_BYTE = 0x00; + + /** + * Indicator byte denoting that the numeric value succeeding it is not null. This is used in the primitive + * writers + */ public static final byte NOT_NULL_BYTE = 0x01; - // TODO(laksh): Add comment here + /** + * Indicator byte denoting that the numeric value succeeding it is null. This is used while writing the individual + * elements writers of an array. ARRAY_NULL_BYTE < ARRAY_NOT_NULL_BYTE to preserve the ordering while doing byte + * comparison + */ public static final byte ARRAY_NULL_BYTE = 0x01; + + /** + * Indicator byte denoting that the numeric value succeeding it is not null. This is used while writing the individual + * elements writers of an array + */ public static final byte ARRAY_NOT_NULL_BYTE = 0x02; private final BaseNullableColumnValueSelector selector; @@ -54,12 +86,13 @@ public NumericFieldWriter( @Override public long writeTo(WritableMemory memory, long position, long maxSize) { - int size = getNumericSize() + Byte.BYTES; + int size = getNumericSizeBytes() + Byte.BYTES; if (maxSize < size) { return -1; } + // Using isNull() since this is a primitive type if (selector.isNull()) { memory.putByte(position, nullIndicatorByte); writeNullToMemory(memory, position + Byte.BYTES); @@ -77,9 +110,21 @@ public void close() // Nothing to do } - public abstract int getNumericSize(); + /** + * @return The size in bytes of the numeric datatype that the implementation of this writer occupies + */ + public abstract int getNumericSizeBytes(); + /** + * Writes the value pointed by the selector to memory. The caller should ensure that the selector gives out the + * correct primitive type + */ public abstract void writeSelectorToMemory(WritableMemory memory, long position); + /** + * Writes the default value for the type to the memory. For long, it is 0L, for double, it is 0.0d etc. Useful mainly + * when the SQL incompatible mode is turned off, and maintains the fact that the size of the numeric field written + * doesn't vary irrespective of whether the value is null + */ public abstract void writeNullToMemory(WritableMemory memory, long position); } diff --git a/processing/src/main/java/org/apache/druid/frame/field/NumericFieldWriterFactory.java b/processing/src/main/java/org/apache/druid/frame/field/NumericFieldWriterFactory.java index 5070f2019508..a7ae47c91f4e 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/NumericFieldWriterFactory.java +++ b/processing/src/main/java/org/apache/druid/frame/field/NumericFieldWriterFactory.java @@ -21,8 +21,13 @@ import org.apache.druid.segment.ColumnValueSelector; -// TODO(laksh): Javadoc +/** + * Factory for {@link NumericFieldWriter} + */ public interface NumericFieldWriterFactory { + /** + * Constructs an instance of {@link NumericFieldWriter} given the column selector + */ NumericFieldWriter get(ColumnValueSelector selector); } diff --git a/processing/src/main/java/org/apache/druid/frame/field/TransformUtils.java b/processing/src/main/java/org/apache/druid/frame/field/TransformUtils.java index 542ddadf6ab0..f882f0443568 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/TransformUtils.java +++ b/processing/src/main/java/org/apache/druid/frame/field/TransformUtils.java @@ -19,6 +19,12 @@ package org.apache.druid.frame.field; +/** + * Utility methods to map the primitive numeric types into an equi-wide byte representation, such that the + * given byte sequence preserves the ordering of the original type when done byte comparison. + * Checkout {@link org.apache.druid.frame.read.FrameReaderUtils#compareMemoryToByteArrayUnsigned} for how this byte + * comparison is performed. + */ public class TransformUtils { /** diff --git a/processing/src/main/java/org/apache/druid/frame/read/FrameReader.java b/processing/src/main/java/org/apache/druid/frame/read/FrameReader.java index 1a497aeed4fa..af2958520389 100644 --- a/processing/src/main/java/org/apache/druid/frame/read/FrameReader.java +++ b/processing/src/main/java/org/apache/druid/frame/read/FrameReader.java @@ -61,7 +61,14 @@ public class FrameReader // Field readers, for row-based frames. private final List fieldReaders; - // TODO(laksh): Write comment why this is a hack for now + /** + * Currently, only ROW_BASED frames support numerical array columns, while the COLUMNAR frames donot. While creating + * a FrameReader, for types unsupported by COLUMNAR frames, we populate this field to denote that the FrameReader is + * "incomplete" and can't be used to read the columnar frame. However, the FrameReader performs as expected for the + * row-based frames. + * In short, this is a temporary measure till columnar frames support the numerical array types to punt the unsupported + * type check for the numerical arrays (for COLUMNAR frames only) at the usage time, rather than the creation time + */ private final Optional> unsupportedColumnAndType; @@ -110,13 +117,16 @@ public static FrameReader create(final RowSignature signature) fieldReaders.add(FieldReaders.create(signature.getColumnName(columnNumber), columnType)); - // TODO(laksh): comment + // If we encounter a numeric array type, then don't throw the error immediately since the reader can be used to + // read only the ROW_BASED frames. Rather, set the optional, and throw the appropriate error message when the reader + // tries to read COLUMNAR frame. This should go away once the COLUMNAR frames also start supporting the numeric + // array if (columnType.getType() == ValueType.ARRAY && Preconditions.checkNotNull( columnType.getElementType(), "Element type for array column [%s]", signature.getColumnName(columnNumber) - ).getType() != ValueType.STRING + ).getType().isNumeric() ) { if (!unsupportedColumnAndType.isPresent()) { unsupportedColumnAndType = Optional.of(Pair.of(signature.getColumnName(columnNumber), columnType)); diff --git a/processing/src/test/java/org/apache/druid/frame/field/DoubleArrayFieldReaderTest.java b/processing/src/test/java/org/apache/druid/frame/field/DoubleArrayFieldReaderTest.java index 7aaef2bd28af..768919bea0a3 100644 --- a/processing/src/test/java/org/apache/druid/frame/field/DoubleArrayFieldReaderTest.java +++ b/processing/src/test/java/org/apache/druid/frame/field/DoubleArrayFieldReaderTest.java @@ -146,7 +146,6 @@ public void test_makeColumnValueSelector_null() Assert.assertTrue(readSelector.isNull()); } - // TODO(laksh): Add a comment about why null doesn't change to default value @Test public void test_makeColumnValueSelector_aValue() { diff --git a/processing/src/test/java/org/apache/druid/frame/field/FloatArrayFieldReaderTest.java b/processing/src/test/java/org/apache/druid/frame/field/FloatArrayFieldReaderTest.java index c72df2a7c750..abda158c2a89 100644 --- a/processing/src/test/java/org/apache/druid/frame/field/FloatArrayFieldReaderTest.java +++ b/processing/src/test/java/org/apache/druid/frame/field/FloatArrayFieldReaderTest.java @@ -147,7 +147,6 @@ public void test_makeColumnValueSelector_null() Assert.assertTrue(readSelector.isNull()); } - // TODO(laksh): Add a comment about why null doesn't change to default value @Test public void test_makeColumnValueSelector_aValue() { diff --git a/processing/src/test/java/org/apache/druid/frame/field/LongArrayFieldReaderTest.java b/processing/src/test/java/org/apache/druid/frame/field/LongArrayFieldReaderTest.java index 12cd77e97d01..fd9a41e6df81 100644 --- a/processing/src/test/java/org/apache/druid/frame/field/LongArrayFieldReaderTest.java +++ b/processing/src/test/java/org/apache/druid/frame/field/LongArrayFieldReaderTest.java @@ -132,7 +132,6 @@ public void test_makeColumnValueSelector_null() Assert.assertTrue(readSelector.isNull()); } - // TODO(laksh): Add a comment about why null doesn't change to default value @Test public void test_makeColumnValueSelector_aValue() { diff --git a/processing/src/test/java/org/apache/druid/frame/field/TransformUtilsTest.java b/processing/src/test/java/org/apache/druid/frame/field/TransformUtilsTest.java index 61f78e3e5014..276e598ad367 100644 --- a/processing/src/test/java/org/apache/druid/frame/field/TransformUtilsTest.java +++ b/processing/src/test/java/org/apache/druid/frame/field/TransformUtilsTest.java @@ -26,7 +26,6 @@ import java.util.List; -// TODO(laksh): Javadoc for this method and its implementation class public class TransformUtilsTest { diff --git a/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTest.java b/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTest.java index 52d05287edcd..31b24825f959 100644 --- a/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTest.java +++ b/processing/src/test/java/org/apache/druid/frame/write/FrameWriterTest.java @@ -156,7 +156,8 @@ public void test_long() @Test public void test_arrayLong() { - // TODO(laksh): add explanation + // ARRAY can't be read or written for columnar frames, therefore skip the check if it encounters those + // parameters Assume.assumeFalse(inputFrameType == FrameType.COLUMNAR || outputFrameType == FrameType.COLUMNAR); testWithDataset(FrameWriterTestData.TEST_ARRAYS_LONG); } @@ -164,7 +165,8 @@ public void test_arrayLong() @Test public void test_arrayFloat() { - // TODO(laksh): add explanation + // ARRAY can't be read or written for columnar frames, therefore skip the check if it encounters those + // parameters Assume.assumeFalse(inputFrameType == FrameType.COLUMNAR || outputFrameType == FrameType.COLUMNAR); testWithDataset(FrameWriterTestData.TEST_ARRAYS_FLOAT); } @@ -172,7 +174,8 @@ public void test_arrayFloat() @Test public void test_arrayDouble() { - // TODO(laksh): add explanation + // ARRAY can't be read or written for columnar frames, therefore skip the check if it encounters those + // parameters Assume.assumeFalse(inputFrameType == FrameType.COLUMNAR || outputFrameType == FrameType.COLUMNAR); testWithDataset(FrameWriterTestData.TEST_ARRAYS_DOUBLE); } @@ -253,7 +256,8 @@ public void test_typePairs() if (dataset1.getType().isArray() && dataset1.getType().getElementType().isNumeric() || dataset2.getType().isArray() && dataset2.getType().getElementType().isNumeric()) { if (inputFrameType == FrameType.COLUMNAR || outputFrameType == FrameType.COLUMNAR) { - // TODO(laksh) + // Skip the check if any of the dataset is a numerical array and any of the input or the output frame type + // is COLUMNAR. continue; } }