Skip to content

Commit

Permalink
remove todos, add javadocs
Browse files Browse the repository at this point in the history
  • Loading branch information
LakshSingla committed Sep 7, 2023
1 parent 3249066 commit 36077e9
Show file tree
Hide file tree
Showing 14 changed files with 95 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ private DoubleFieldWriter(final BaseDoubleColumnValueSelector selector, final bo
}

@Override
public int getNumericSize()
public int getNumericSizeBytes()
{
return Double.BYTES;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public void close()
}

@Override
public int getNumericSize()
public int getNumericSizeBytes()
{
return Float.BYTES;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ private LongFieldWriter(final BaseLongColumnValueSelector selector, final boolea
}

@Override
public int getNumericSize()
public int getNumericSizeBytes()
{
return Long.BYTES;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ public DimensionSelector makeDimensionSelector(
@Nullable ExtractionFn extractionFn
)
{
// TODO(laksh): Should I throw an exception here
return DimensionSelector.nilSelector();
throw DruidException.defensive("Cannot call makeDimensionSelector on field of type ARRAY");
}

@Override
Expand Down Expand Up @@ -159,7 +158,7 @@ private void updateCurrentArray(final long fieldPosition)
return;
}

// TODO(laksh): add comment about position < limit check
// Adding a check here to prevent the position from potentially overflowing
if (position < limit) {
position++;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,12 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector)
@Override
public boolean isNull()
{
// TODO(laksh): Add a comment why NullHandling.replaceWithDefault() is not required here
// Arrays preserve the individual element's nullity when they are written and read.
// Therefore, when working with SQL incompatible mode, [7, null] won't change to [7, 0] when written to and
// read from the underlying serialization (as compared with the primitives). Therefore,
// even when NullHandling.replaceWithDefault() is true we need to write null as is, and not convert it to their
// default value when writing the array. Therefore, the check is `getObject() == null` ignoring the value of
// `NullHandling.replaceWithDefaul()`.
return getObject() == null;
}

Expand All @@ -141,7 +146,7 @@ public Class<? extends Number> classOfObject()

NumericFieldWriter writer = writerFactory.get(columnValueSelector);

int requiredSize = Byte.BYTES + (writer.getNumericSize() + Byte.BYTES) * list.size() + Byte.BYTES;
int requiredSize = Byte.BYTES + (writer.getNumericSizeBytes() + Byte.BYTES) * list.size() + Byte.BYTES;

if (requiredSize > maxSize) {
return -1;
Expand All @@ -157,7 +162,7 @@ public Class<? extends Number> classOfObject()
position + offset,
maxSize - offset
);
offset += Byte.BYTES + writer.getNumericSize();
offset += Byte.BYTES + writer.getNumericSizeBytes();
}

memory.putByte(position + offset, ARRAY_TERMINATOR);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,46 @@
import org.apache.datasketches.memory.WritableMemory;
import org.apache.druid.segment.BaseNullableColumnValueSelector;

// TODO(laksh): Add comment here
/**
* FieldWriter for numeric datatypes. The parent class does the null handling for the underlying data, while
* the individual subclasses write the individual element (long, float or double type). This also allows for a clean
* reuse while creating {@link NumericArrayFieldWriter}
* <p>
* Indicator bytes for denoting whether the element is null or not null changes depending on whether the writer is used
* to write the data for individual value (like LONG) or for an element of an array (like ARRAY<LONG>). This is because
* array support for the numeric types was added later and by then the field writers for individual fields were using
* 0x00 to denote the null byte, which is reserved for denoting the array end when we are writing the elements as part
* of the array instead. (0x00 is used for array end because it helps in preserving the byte comparison property of the
* numeric array field writers).
* <p>
* Therefore, to preserve backward and forward compatibility, the individual element's writers were left unchanged,
* while the array's element's writers used 0x01 and 0x02 to denote null and non-null byte respectively
*/
public abstract class NumericFieldWriter implements FieldWriter
{
/**
* Indicator byte denoting that the numeric value succeeding it is null. This is used in the primitive
* writers. NULL_BYTE < NOT_NULL_BYTE to preserve the ordering while doing byte comparison
*/
public static final byte NULL_BYTE = 0x00;

/**
* Indicator byte denoting that the numeric value succeeding it is not null. This is used in the primitive
* writers
*/
public static final byte NOT_NULL_BYTE = 0x01;

// TODO(laksh): Add comment here
/**
* Indicator byte denoting that the numeric value succeeding it is null. This is used while writing the individual
* elements writers of an array. ARRAY_NULL_BYTE < ARRAY_NOT_NULL_BYTE to preserve the ordering while doing byte
* comparison
*/
public static final byte ARRAY_NULL_BYTE = 0x01;

/**
* Indicator byte denoting that the numeric value succeeding it is not null. This is used while writing the individual
* elements writers of an array
*/
public static final byte ARRAY_NOT_NULL_BYTE = 0x02;

private final BaseNullableColumnValueSelector selector;
Expand All @@ -54,12 +86,13 @@ public NumericFieldWriter(
@Override
public long writeTo(WritableMemory memory, long position, long maxSize)
{
int size = getNumericSize() + Byte.BYTES;
int size = getNumericSizeBytes() + Byte.BYTES;

if (maxSize < size) {
return -1;
}

// Using isNull() since this is a primitive type
if (selector.isNull()) {
memory.putByte(position, nullIndicatorByte);
writeNullToMemory(memory, position + Byte.BYTES);
Expand All @@ -77,9 +110,21 @@ public void close()
// Nothing to do
}

public abstract int getNumericSize();
/**
* @return The size in bytes of the numeric datatype that the implementation of this writer occupies
*/
public abstract int getNumericSizeBytes();

/**
* Writes the value pointed by the selector to memory. The caller should ensure that the selector gives out the
* correct primitive type
*/
public abstract void writeSelectorToMemory(WritableMemory memory, long position);

/**
* Writes the default value for the type to the memory. For long, it is 0L, for double, it is 0.0d etc. Useful mainly
* when the SQL incompatible mode is turned off, and maintains the fact that the size of the numeric field written
* doesn't vary irrespective of whether the value is null
*/
public abstract void writeNullToMemory(WritableMemory memory, long position);
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,13 @@

import org.apache.druid.segment.ColumnValueSelector;

// TODO(laksh): Javadoc
/**
* Factory for {@link NumericFieldWriter}
*/
public interface NumericFieldWriterFactory
{
/**
* Constructs an instance of {@link NumericFieldWriter} given the column selector
*/
NumericFieldWriter get(ColumnValueSelector<Number> selector);
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@

package org.apache.druid.frame.field;

/**
* Utility methods to map the primitive numeric types into an equi-wide byte representation, such that the
* given byte sequence preserves the ordering of the original type when done byte comparison.
* Checkout {@link org.apache.druid.frame.read.FrameReaderUtils#compareMemoryToByteArrayUnsigned} for how this byte
* comparison is performed.
*/
public class TransformUtils
{
/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,14 @@ public class FrameReader
// Field readers, for row-based frames.
private final List<FieldReader> fieldReaders;

// TODO(laksh): Write comment why this is a hack for now
/**
* Currently, only ROW_BASED frames support numerical array columns, while the COLUMNAR frames donot. While creating
* a FrameReader, for types unsupported by COLUMNAR frames, we populate this field to denote that the FrameReader is
* "incomplete" and can't be used to read the columnar frame. However, the FrameReader performs as expected for the
* row-based frames.
* In short, this is a temporary measure till columnar frames support the numerical array types to punt the unsupported
* type check for the numerical arrays (for COLUMNAR frames only) at the usage time, rather than the creation time
*/
private final Optional<Pair<String, ColumnType>> unsupportedColumnAndType;


Expand Down Expand Up @@ -110,13 +117,16 @@ public static FrameReader create(final RowSignature signature)

fieldReaders.add(FieldReaders.create(signature.getColumnName(columnNumber), columnType));

// TODO(laksh): comment
// If we encounter a numeric array type, then don't throw the error immediately since the reader can be used to
// read only the ROW_BASED frames. Rather, set the optional, and throw the appropriate error message when the reader
// tries to read COLUMNAR frame. This should go away once the COLUMNAR frames also start supporting the numeric
// array
if (columnType.getType() == ValueType.ARRAY
&& Preconditions.checkNotNull(
columnType.getElementType(),
"Element type for array column [%s]",
signature.getColumnName(columnNumber)
).getType() != ValueType.STRING
).getType().isNumeric()
) {
if (!unsupportedColumnAndType.isPresent()) {
unsupportedColumnAndType = Optional.of(Pair.of(signature.getColumnName(columnNumber), columnType));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,6 @@ public void test_makeColumnValueSelector_null()
Assert.assertTrue(readSelector.isNull());
}

// TODO(laksh): Add a comment about why null doesn't change to default value
@Test
public void test_makeColumnValueSelector_aValue()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,6 @@ public void test_makeColumnValueSelector_null()
Assert.assertTrue(readSelector.isNull());
}

// TODO(laksh): Add a comment about why null doesn't change to default value
@Test
public void test_makeColumnValueSelector_aValue()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ public void test_makeColumnValueSelector_null()
Assert.assertTrue(readSelector.isNull());
}

// TODO(laksh): Add a comment about why null doesn't change to default value
@Test
public void test_makeColumnValueSelector_aValue()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@

import java.util.List;

// TODO(laksh): Javadoc for this method and its implementation class
public class TransformUtilsTest
{

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,23 +156,26 @@ public void test_long()
@Test
public void test_arrayLong()
{
// TODO(laksh): add explanation
// ARRAY<LONG> can't be read or written for columnar frames, therefore skip the check if it encounters those
// parameters
Assume.assumeFalse(inputFrameType == FrameType.COLUMNAR || outputFrameType == FrameType.COLUMNAR);
testWithDataset(FrameWriterTestData.TEST_ARRAYS_LONG);
}

@Test
public void test_arrayFloat()
{
// TODO(laksh): add explanation
// ARRAY<FLOAT> can't be read or written for columnar frames, therefore skip the check if it encounters those
// parameters
Assume.assumeFalse(inputFrameType == FrameType.COLUMNAR || outputFrameType == FrameType.COLUMNAR);
testWithDataset(FrameWriterTestData.TEST_ARRAYS_FLOAT);
}

@Test
public void test_arrayDouble()
{
// TODO(laksh): add explanation
// ARRAY<DOUBLE> can't be read or written for columnar frames, therefore skip the check if it encounters those
// parameters
Assume.assumeFalse(inputFrameType == FrameType.COLUMNAR || outputFrameType == FrameType.COLUMNAR);
testWithDataset(FrameWriterTestData.TEST_ARRAYS_DOUBLE);
}
Expand Down Expand Up @@ -253,7 +256,8 @@ public void test_typePairs()
if (dataset1.getType().isArray() && dataset1.getType().getElementType().isNumeric()
|| dataset2.getType().isArray() && dataset2.getType().getElementType().isNumeric()) {
if (inputFrameType == FrameType.COLUMNAR || outputFrameType == FrameType.COLUMNAR) {
// TODO(laksh)
// Skip the check if any of the dataset is a numerical array and any of the input or the output frame type
// is COLUMNAR.
continue;
}
}
Expand Down

0 comments on commit 36077e9

Please sign in to comment.