diff --git a/processing/src/main/java/org/apache/druid/frame/field/DoubleArrayFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/DoubleArrayFieldReader.java index 06c0c8ff968d..b8a3d825aba7 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/DoubleArrayFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/DoubleArrayFieldReader.java @@ -24,6 +24,9 @@ import javax.annotation.Nullable; +/** + * Reader for fields written by {@link NumericArrayFieldWriter#getDoubleArrayFieldWriter} + */ public class DoubleArrayFieldReader extends NumericArrayFieldReader { @Override diff --git a/processing/src/main/java/org/apache/druid/frame/field/DoubleFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/DoubleFieldReader.java index 473ef9066c1d..805d71774e1e 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/DoubleFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/DoubleFieldReader.java @@ -27,6 +27,7 @@ import org.apache.druid.segment.column.ValueType; /** + * Reads the values produced by {@link DoubleFieldWriter} */ public class DoubleFieldReader extends NumericFieldReader { diff --git a/processing/src/main/java/org/apache/druid/frame/field/DoubleFieldWriter.java b/processing/src/main/java/org/apache/druid/frame/field/DoubleFieldWriter.java index 0fb08d3098ce..616e7aa26465 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/DoubleFieldWriter.java +++ b/processing/src/main/java/org/apache/druid/frame/field/DoubleFieldWriter.java @@ -24,8 +24,6 @@ /** * Wraps a {@link BaseDoubleColumnValueSelector} and writes field values. - *

- * See {@link DoubleFieldReader} for format details. */ public class DoubleFieldWriter extends NumericFieldWriter { diff --git a/processing/src/main/java/org/apache/druid/frame/field/FloatArrayFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/FloatArrayFieldReader.java index bcefc5935246..01905e420bb5 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/FloatArrayFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/FloatArrayFieldReader.java @@ -24,6 +24,9 @@ import javax.annotation.Nullable; +/** + * Reader for fields written by {@link NumericArrayFieldWriter#getFloatArrayFieldWriter} + */ public class FloatArrayFieldReader extends NumericArrayFieldReader { @Override diff --git a/processing/src/main/java/org/apache/druid/frame/field/FloatFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/FloatFieldReader.java index 0dc54ad2a5ec..0897a36bae8f 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/FloatFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/FloatFieldReader.java @@ -28,13 +28,6 @@ /** * Reads values written by {@link FloatFieldWriter}. - * - * Values are sortable as bytes without decoding. - * - * Format: - * - * - 1 byte: {@link FloatFieldWriter#NULL_BYTE} or {@link FloatFieldWriter#NOT_NULL_BYTE} - * - 4 bytes: encoded float, using {@link TransformUtils#transformFromFloat(float)} */ public class FloatFieldReader extends NumericFieldReader { diff --git a/processing/src/main/java/org/apache/druid/frame/field/FloatFieldWriter.java b/processing/src/main/java/org/apache/druid/frame/field/FloatFieldWriter.java index 6f11ebc0ab16..474061d26fc3 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/FloatFieldWriter.java +++ b/processing/src/main/java/org/apache/druid/frame/field/FloatFieldWriter.java @@ -24,8 +24,6 @@ /** * Wraps a {@link BaseFloatColumnValueSelector} and writes field values. - *

- * See {@link FloatFieldReader} for format details. */ public class FloatFieldWriter extends NumericFieldWriter { diff --git a/processing/src/main/java/org/apache/druid/frame/field/LongArrayFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/LongArrayFieldReader.java index 56542a3fd0b8..6c1c5581ffae 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/LongArrayFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/LongArrayFieldReader.java @@ -24,6 +24,9 @@ import javax.annotation.Nullable; +/** + * Reader for fields written by {@link NumericArrayFieldWriter#getLongArrayFieldWriter} + */ public class LongArrayFieldReader extends NumericArrayFieldReader { @Override diff --git a/processing/src/main/java/org/apache/druid/frame/field/LongFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/LongFieldReader.java index 7885bcc2e5d9..ce8063a28cad 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/LongFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/LongFieldReader.java @@ -28,13 +28,6 @@ /** * Reads values written by {@link LongFieldWriter}. - *

- * Values are sortable as bytes without decoding. - *

- * Format: - *

- * - 1 byte: {@link LongFieldWriter#NULL_BYTE} or {@link LongFieldWriter#NOT_NULL_BYTE} - * - 8 bytes: encoded long: big-endian order, with sign flipped */ public class LongFieldReader extends NumericFieldReader { diff --git a/processing/src/main/java/org/apache/druid/frame/field/LongFieldWriter.java b/processing/src/main/java/org/apache/druid/frame/field/LongFieldWriter.java index db484c12a33b..2d324ae9b586 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/LongFieldWriter.java +++ b/processing/src/main/java/org/apache/druid/frame/field/LongFieldWriter.java @@ -24,8 +24,6 @@ /** * Wraps a {@link BaseLongColumnValueSelector} and writes individual values into frame rows. - *

- * See {@link LongFieldReader} for format details. */ public class LongFieldWriter extends NumericFieldWriter { diff --git a/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldReader.java index 9c6a2ab08b9d..cfc5584b63a3 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldReader.java @@ -30,6 +30,12 @@ import java.util.ArrayList; import java.util.List; +/** + * Reader class for the fields written by {@link NumericArrayFieldWriter}. See the Javadoc for the writer for more + * information on the format + * + * The numeric array fields are byte comparable + */ public abstract class NumericArrayFieldReader implements FieldReader { @Override diff --git a/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldWriter.java b/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldWriter.java index af81b20b0b3f..1c9baf56c57f 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldWriter.java +++ b/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldWriter.java @@ -29,28 +29,91 @@ import java.util.List; import java.util.concurrent.atomic.AtomicInteger; +/** + * Writes the values of the type ARRAY where X is a numeric type to row based frames. + * The format of the array written is as follows: + *

+ * Format: + * - 1 Byte - {@link #NULL_ROW} or {@link #NON_NULL_ROW} denoting whether the array itself is null + * - If the array is null, then the writer stops here + * - If the array is not null, then it proceeds to the following steps + *

+ * For each value in the non-null array: + * - 1 Byte - {@link NumericFieldWriter#ARRAY_ELEMENT_NULL_BYTE} or {@link NumericFieldWriter#ARRAY_ELEMENT_NOT_NULL_BYTE} + * denothing whether the proceeding value is null or not. + * - ElementSize Bytes - The encoded value of the element + *

+ * Once all the values in the non-null arrays are over, writes {@link #ARRAY_TERMINATOR}. This is to aid the byte + * comparison, and also let the reader know that the number of elements in the array are over. + *

+ * The format doesn't add the number of elements in the array at the beginning, though that would have been more + * convenient to keep the written array value byte comparable + *

+ * Examples: + * 1. null + * | Bytes | Value | Interpretation | + * |--------|-------|-----------------------------| + * | 1 | 0x00 | Denotes that the array null | + *

+ * 2. [] (empty array) + * | Bytes | Value | Interpretation | + * |--------|----- -|------------------------------------| + * | 1 | 0x01 | Denotes that the array is not null | + * | 2 | 0x00 | End of the array | + *

+ * 3. [5L, null, 6L] + * | Bytes | Value | Interpretation | + * |---------|--------------|-----------------------------------------------------------------------------------| + * | 1 | 0x01 | Denotes that the array is not null | + * | 2 | 0x02 | Denotes that the next element is not null | + * | 3-10 | transform(5) | Representation of 5 | + * | 11 | 0x01 | Denotes that the next element is null | + * | 12-19 | transform(0) | Representation of 0 (default value, the reader will ignore it if SqlCompatible mode is on | + * | 20 | 0x02 | Denotes that the next element is not null | + * | 21-28 | transform(6) | Representation of 6 | + * | 29 | 0x00 | End of array | + */ public class NumericArrayFieldWriter implements FieldWriter { + /** + * Denotes that the array itself is null + */ public static final byte NULL_ROW = 0x00; + + /** + * Denotes that the array is non null + */ public static final byte NON_NULL_ROW = 0x01; - // Different from NULL_ROW and NON_NULL_ROW bytes + /** + * Marks the end of the array. Since {@link #NULL_ROW} and {@link #ARRAY_TERMINATOR} will only occur at different + * locations, therefore there is no clash in keeping both's values at 0x00 + */ public static final byte ARRAY_TERMINATOR = 0x00; private final ColumnValueSelector selector; private final NumericFieldWriterFactory writerFactory; + /** + * Returns the writer for ARRAY + */ public static NumericArrayFieldWriter getLongArrayFieldWriter(final ColumnValueSelector selector) { return new NumericArrayFieldWriter(selector, LongFieldWriter::forArray); } + /** + * Returns the writer for ARRAY + */ public static NumericArrayFieldWriter getFloatArrayFieldWriter(final ColumnValueSelector selector) { return new NumericArrayFieldWriter(selector, FloatFieldWriter::forArray); } + /** + * Returns the writer for ARRAY + */ public static NumericArrayFieldWriter getDoubleArrayFieldWriter(final ColumnValueSelector selector) { return new NumericArrayFieldWriter(selector, DoubleFieldWriter::forArray); @@ -74,6 +137,7 @@ public long writeTo(WritableMemory memory, long position, long maxSize) memory.putByte(position, NULL_ROW); return requiredSize; } else { + List list = FrameWriterUtils.getNumericArrayFromNumericArray(row); if (list == null) { @@ -85,6 +149,7 @@ public long writeTo(WritableMemory memory, long position, long maxSize) return requiredSize; } + // Create a columnValueSelector to write the individual elements re-using the NumericFieldWriter AtomicInteger index = new AtomicInteger(0); ColumnValueSelector columnValueSelector = new ColumnValueSelector() { diff --git a/processing/src/main/java/org/apache/druid/frame/field/NumericFieldReader.java b/processing/src/main/java/org/apache/druid/frame/field/NumericFieldReader.java index 4619f2a36f9b..e56b7de6228f 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/NumericFieldReader.java +++ b/processing/src/main/java/org/apache/druid/frame/field/NumericFieldReader.java @@ -28,9 +28,15 @@ import javax.annotation.Nullable; +/** + * Reads the fields created by the {@link NumericFieldWriter}. See the Javadoc for the writer for format details + */ public abstract class NumericFieldReader implements FieldReader { + /** + * The indicator byte which denotes that the following value is null. + */ private final byte nullIndicatorByte; public NumericFieldReader(boolean forArray) @@ -38,7 +44,7 @@ public NumericFieldReader(boolean forArray) if (!forArray) { this.nullIndicatorByte = NumericFieldWriter.NULL_BYTE; } else { - this.nullIndicatorByte = NumericFieldWriter.ARRAY_NULL_BYTE; + this.nullIndicatorByte = NumericFieldWriter.ARRAY_ELEMENT_NULL_BYTE; } } diff --git a/processing/src/main/java/org/apache/druid/frame/field/NumericFieldWriter.java b/processing/src/main/java/org/apache/druid/frame/field/NumericFieldWriter.java index 4dcdb096a369..266a62edab83 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/NumericFieldWriter.java +++ b/processing/src/main/java/org/apache/druid/frame/field/NumericFieldWriter.java @@ -25,17 +25,24 @@ /** * FieldWriter for numeric datatypes. The parent class does the null handling for the underlying data, while * the individual subclasses write the individual element (long, float or double type). This also allows for a clean - * reuse while creating {@link NumericArrayFieldWriter} - *

+ * reuse of the readers and writers between the numeric types and also allowing the array writers ({@link NumericArrayFieldWriter}) + * to use these methods directly without duplication + * + * Format: + * - 1 byte: Whether the following value is null or not. Take a look at the note on the indicator bytes. + * - X bytes: Encoded value of the selector, or the default value if it is null. X denotes the size of the numeric value + * * Indicator bytes for denoting whether the element is null or not null changes depending on whether the writer is used * to write the data for individual value (like LONG) or for an element of an array (like ARRAY). This is because * array support for the numeric types was added later and by then the field writers for individual fields were using * 0x00 to denote the null byte, which is reserved for denoting the array end when we are writing the elements as part * of the array instead. (0x00 is used for array end because it helps in preserving the byte comparison property of the * numeric array field writers). - *

+ * * Therefore, to preserve backward and forward compatibility, the individual element's writers were left unchanged, * while the array's element's writers used 0x01 and 0x02 to denote null and non-null byte respectively + * + * Values produced by the writer are sortable without decoding */ public abstract class NumericFieldWriter implements FieldWriter { @@ -53,16 +60,16 @@ public abstract class NumericFieldWriter implements FieldWriter /** * Indicator byte denoting that the numeric value succeeding it is null. This is used while writing the individual - * elements writers of an array. ARRAY_NULL_BYTE < ARRAY_NOT_NULL_BYTE to preserve the ordering while doing byte - * comparison + * elements writers of an array. ARRAY_ELEMENT_NULL_BYTE < ARRAY_ELEMENT_NOT_NULL_BYTE to preserve the ordering + * while doing byte comparison */ - public static final byte ARRAY_NULL_BYTE = 0x01; + public static final byte ARRAY_ELEMENT_NULL_BYTE = 0x01; /** * Indicator byte denoting that the numeric value succeeding it is not null. This is used while writing the individual * elements writers of an array */ - public static final byte ARRAY_NOT_NULL_BYTE = 0x02; + public static final byte ARRAY_ELEMENT_NOT_NULL_BYTE = 0x02; private final BaseNullableColumnValueSelector selector; private final byte nullIndicatorByte; @@ -78,8 +85,8 @@ public NumericFieldWriter( this.nullIndicatorByte = NULL_BYTE; this.notNullIndicatorByte = NOT_NULL_BYTE; } else { - this.nullIndicatorByte = ARRAY_NULL_BYTE; - this.notNullIndicatorByte = ARRAY_NOT_NULL_BYTE; + this.nullIndicatorByte = ARRAY_ELEMENT_NULL_BYTE; + this.notNullIndicatorByte = ARRAY_ELEMENT_NOT_NULL_BYTE; } } diff --git a/processing/src/test/java/org/apache/druid/frame/field/IndexArrayFieldPointer.java b/processing/src/test/java/org/apache/druid/frame/field/IndexArrayFieldPointer.java index a386bfd705ca..1e115f48e3c5 100644 --- a/processing/src/test/java/org/apache/druid/frame/field/IndexArrayFieldPointer.java +++ b/processing/src/test/java/org/apache/druid/frame/field/IndexArrayFieldPointer.java @@ -23,6 +23,10 @@ import java.util.List; +/** + * Stores the memory locations in an array, and spits out the value pointed to by the memory location by pointer, + * which is settable by the user + */ public class IndexArrayFieldPointer implements ReadableFieldPointer { private final LongArrayList indices;