From 26f24923e29a9f3dbc209532831011b15df0c1dd Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 2 Dec 2024 14:38:53 -0500 Subject: [PATCH 1/3] Create `ArrayScalarBuilder` for creating single element List arrays --- datafusion/common/src/scalar/mod.rs | 76 ++++----- datafusion/common/src/utils/mod.rs | 159 ++++++++++++++---- .../src/aggregate/count_distinct/bytes.rs | 9 +- .../src/aggregate/count_distinct/native.rs | 8 +- .../functions-aggregate/src/array_agg.rs | 9 +- .../functions-aggregate/src/nth_value.rs | 6 +- datafusion/functions-nested/src/make_array.rs | 7 +- datafusion/functions-nested/src/utils.rs | 27 +-- 8 files changed, 192 insertions(+), 109 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index edba0b84431f..9e3acb977e20 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -39,11 +39,7 @@ use crate::cast::{ }; use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err}; use crate::hash_utils::create_hashes; -use crate::utils::{ - array_into_fixed_size_list_array_with_field_name, array_into_large_list_array, - array_into_large_list_array_with_field_name, array_into_list_array, - array_into_list_array_with_field_name, -}; +use crate::utils::SingleRowArrayBuilder; use arrow::compute::kernels::numeric::*; use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions}; use arrow::{ @@ -2092,7 +2088,11 @@ impl ScalarValue { } else { Self::iter_to_array(values.iter().cloned()).unwrap() }; - Arc::new(array_into_list_array(values, nullable)) + Arc::new( + SingleRowArrayBuilder::new(values) + .with_nullable(nullable) + .build_list_array(), + ) } /// Same as [`ScalarValue::new_list`] but with nullable set to true. @@ -2148,7 +2148,11 @@ impl ScalarValue { } else { Self::iter_to_array(values).unwrap() }; - Arc::new(array_into_list_array(values, nullable)) + Arc::new( + SingleRowArrayBuilder::new(values) + .with_nullable(nullable) + .build_list_array(), + ) } /// Converts `Vec` where each element has type corresponding to @@ -2185,7 +2189,7 @@ impl ScalarValue { } else { Self::iter_to_array(values.iter().cloned()).unwrap() }; - Arc::new(array_into_large_list_array(values)) + Arc::new(SingleRowArrayBuilder::new(values).build_large_list_array()) } /// Converts a scalar value into an array of `size` rows. @@ -2665,38 +2669,27 @@ impl ScalarValue { let list_array = array.as_list::(); let nested_array = list_array.value(index); // Produces a single element `ListArray` with the value at `index`. - let arr = Arc::new(array_into_list_array_with_field_name( - nested_array, - field.is_nullable(), - field.name(), - )); - - ScalarValue::List(arr) + SingleRowArrayBuilder::new(nested_array) + .with_field(field) + .build_list_scalar() } DataType::LargeList(field) => { let list_array = as_large_list_array(array); let nested_array = list_array.value(index); // Produces a single element `LargeListArray` with the value at `index`. - let arr = Arc::new(array_into_large_list_array_with_field_name( - nested_array, - field.name(), - )); - - ScalarValue::LargeList(arr) + SingleRowArrayBuilder::new(nested_array) + .with_field(field) + .build_large_list_scalar() } // TODO: There is no test for FixedSizeList now, add it later DataType::FixedSizeList(field, _) => { let list_array = as_fixed_size_list_array(array)?; let nested_array = list_array.value(index); - // Produces a single element `ListArray` with the value at `index`. + // Produces a single element `FixedSizeListArray` with the value at `index`. let list_size = nested_array.len(); - let arr = Arc::new(array_into_fixed_size_list_array_with_field_name( - nested_array, - list_size, - field.name(), - )); - - ScalarValue::FixedSizeList(arr) + SingleRowArrayBuilder::new(nested_array) + .with_field(field) + .build_fixed_size_list_scalar(list_size) } DataType::Date32 => typed_cast!(array, index, Date32Array, Date32)?, DataType::Date64 => typed_cast!(array, index, Date64Array, Date64)?, @@ -3895,7 +3888,6 @@ mod tests { }; use crate::assert_batches_eq; - use crate::utils::array_into_list_array_nullable; use arrow::buffer::OffsetBuffer; use arrow::compute::{is_null, kernels}; use arrow::error::ArrowError; @@ -4071,14 +4063,14 @@ mod tests { let result = ScalarValue::new_list_nullable(scalars.as_slice(), &DataType::Utf8); - let expected = array_into_list_array_nullable(Arc::new(StringArray::from(vec![ - "rust", - "arrow", - "data-fusion", - ]))); + let expected = single_row_list_array(vec!["rust", "arrow", "data-fusion"]); assert_eq!(*result, expected); } + fn single_row_list_array(items: Vec<&str>) -> ListArray { + SingleRowArrayBuilder::new(Arc::new(StringArray::from(items))).build_list_array() + } + fn build_list( values: Vec>>>, ) -> Vec { @@ -4283,12 +4275,8 @@ mod tests { #[test] fn iter_to_array_string_test() { - let arr1 = array_into_list_array_nullable(Arc::new(StringArray::from(vec![ - "foo", "bar", "baz", - ]))); - let arr2 = array_into_list_array_nullable(Arc::new(StringArray::from(vec![ - "rust", "world", - ]))); + let arr1 = single_row_list_array(vec!["foo", "bar", "baz"]); + let arr2 = single_row_list_array(vec!["rust", "world"]); let scalars = vec![ ScalarValue::List(Arc::new(arr1)), @@ -5745,13 +5733,13 @@ mod tests { // Define list-of-structs scalars let nl0_array = ScalarValue::iter_to_array(vec![s0, s1.clone()]).unwrap(); - let nl0 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl0_array))); + let nl0 = SingleRowArrayBuilder::new(nl0_array).build_list_scalar(); let nl1_array = ScalarValue::iter_to_array(vec![s2]).unwrap(); - let nl1 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl1_array))); + let nl1 = SingleRowArrayBuilder::new(nl1_array).build_list_scalar(); let nl2_array = ScalarValue::iter_to_array(vec![s1]).unwrap(); - let nl2 = ScalarValue::List(Arc::new(array_into_list_array_nullable(nl2_array))); + let nl2 = SingleRowArrayBuilder::new(nl2_array).build_list_scalar(); // iter_to_array for list-of-struct let array = ScalarValue::iter_to_array(vec![nl0, nl1, nl2]).unwrap(); diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index f3bba8e254d9..e70cc8aa0b96 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -321,64 +321,164 @@ pub fn longest_consecutive_prefix>( count } +/// Creates single element [`ListArray`], [`LargeListArray`] and +/// [`FixedListArray`] from other arrays +/// +/// For example this builder can convert `[1, 2, 3]` into `[[1, 2, 3]]` +/// +/// # Example +/// ``` +/// # use std::sync::Arc; +/// # use arrow_array::{Array, ListArray}; +/// # use arrow_array::types::Int64Type; +/// # use datafusion_common::utils::SingleRowArrayBuilder; +/// // Array is [1, 2, 3] +/// let arr = ListArray::from_iter_primitive::(vec![ +/// Some(vec![Some(1), Some(2), Some(3)]), +/// ]); +/// // Wrap as a list array: [[1, 2, 3]] +/// let list_arr = SingleRowArrayBuilder::new(Arc::new(arr)).build_list_array(); +/// assert_eq!(list_arr.len(), 1); +/// ``` +#[derive(Debug, Clone)] +pub struct SingleRowArrayBuilder { + /// array to be wrapped + arr: ArrayRef, + /// Should the resulting array be nullable? Defaults to `true`. + nullable: bool, + /// Specify the field name for the resulting array. Defaults to value used in + /// [`Field::new_list_field`] + field_name: Option, +} + +impl SingleRowArrayBuilder { + /// Create a new instance of `SingleRowArrayBuilder` + pub fn new(arr: ArrayRef) -> Self { + Self { + arr, + nullable: true, + field_name: None, + } + } + + /// Set the nullable flag + pub fn with_nullable(mut self, nullable: bool) -> Self { + self.nullable = nullable; + self + } + + /// sets the field name for the resulting array + pub fn with_field_name(mut self, field_name: Option) -> Self { + self.field_name = field_name; + self + } + + /// Copies field name and nullable from the specified field + pub fn with_field(self, field: &Field) -> Self { + self.with_field_name(Some(field.name().to_owned())) + .with_nullable(field.is_nullable()) + } + + /// Build a single element [`ListArray`] + pub fn build_list_array(self) -> ListArray { + let (field, arr) = self.into_field_and_arr(); + let offsets = OffsetBuffer::from_lengths([arr.len()]); + ListArray::new(field, offsets, arr, None) + } + + /// Build a single element [`ListArray`] and wrap as [`ScalarValue::List`] + pub fn build_list_scalar(self) -> ScalarValue { + ScalarValue::List(Arc::new(self.build_list_array())) + } + + /// Build a single element [`LargeListArray`] + pub fn build_large_list_array(self) -> LargeListArray { + let (field, arr) = self.into_field_and_arr(); + let offsets = OffsetBuffer::from_lengths([arr.len()]); + LargeListArray::new(field, offsets, arr, None) + } + + /// Build a single element [`LargeListArray`] and wrap as [`ScalarValue::LargeList`] + pub fn build_large_list_scalar(self) -> ScalarValue { + ScalarValue::LargeList(Arc::new(self.build_large_list_array())) + } + + /// Build a single element [`FixedSizeListArray`] + pub fn build_fixed_size_list_array(self, list_size: usize) -> FixedSizeListArray { + let (field, arr) = self.into_field_and_arr(); + FixedSizeListArray::new(field, list_size as i32, arr, None) + } + + /// Build a single element [`FixedSizeListArray`] and wrap as [`ScalarValue::FixedSizeList`] + pub fn build_fixed_size_list_scalar(self, list_size: usize) -> ScalarValue { + ScalarValue::FixedSizeList(Arc::new(self.build_fixed_size_list_array(list_size))) + } + + /// Helper function: convert this builder into a tuple of field and array + fn into_field_and_arr(self) -> (Arc, ArrayRef) { + let Self { + arr, + nullable, + field_name, + } = self; + let data_type = arr.data_type().to_owned(); + let field = match field_name { + Some(name) => Field::new(name, data_type, nullable), + None => Field::new_list_field(data_type, nullable), + }; + (Arc::new(field), arr) + } +} + /// Wrap an array into a single element `ListArray`. /// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]` /// The field in the list array is nullable. +#[deprecated(since = "44.0.0", note = "please use `SingleRowArrayBuilder` instead")] pub fn array_into_list_array_nullable(arr: ArrayRef) -> ListArray { - array_into_list_array(arr, true) + SingleRowArrayBuilder::new(arr) + .with_nullable(true) + .build_list_array() } /// Wrap an array into a single element `ListArray`. /// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]` +#[deprecated(since = "44.0.0", note = "please use `SingleRowArrayBuilder` instead")] pub fn array_into_list_array(arr: ArrayRef, nullable: bool) -> ListArray { - let offsets = OffsetBuffer::from_lengths([arr.len()]); - ListArray::new( - Arc::new(Field::new_list_field(arr.data_type().to_owned(), nullable)), - offsets, - arr, - None, - ) + SingleRowArrayBuilder::new(arr) + .with_nullable(nullable) + .build_list_array() } +#[deprecated(since = "44.0.0", note = "please use `SingleRowArrayBuilder` instead")] pub fn array_into_list_array_with_field_name( arr: ArrayRef, nullable: bool, field_name: &str, ) -> ListArray { - let offsets = OffsetBuffer::from_lengths([arr.len()]); - ListArray::new( - Arc::new(Field::new(field_name, arr.data_type().to_owned(), nullable)), - offsets, - arr, - None, - ) + SingleRowArrayBuilder::new(arr) + .with_nullable(nullable) + .with_field_name(Some(field_name.to_string())) + .build_list_array() } /// Wrap an array into a single element `LargeListArray`. /// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]` +#[deprecated(since = "44.0.0", note = "please use `SingleRowArrayBuilder` instead")] pub fn array_into_large_list_array(arr: ArrayRef) -> LargeListArray { - let offsets = OffsetBuffer::from_lengths([arr.len()]); - LargeListArray::new( - Arc::new(Field::new_list_field(arr.data_type().to_owned(), true)), - offsets, - arr, - None, - ) + SingleRowArrayBuilder::new(arr).build_large_list_array() } +#[deprecated(since = "44.0.0", note = "please use `SingleRowArrayBuilder` instead")] pub fn array_into_large_list_array_with_field_name( arr: ArrayRef, field_name: &str, ) -> LargeListArray { - let offsets = OffsetBuffer::from_lengths([arr.len()]); - LargeListArray::new( - Arc::new(Field::new(field_name, arr.data_type().to_owned(), true)), - offsets, - arr, - None, - ) + SingleRowArrayBuilder::new(arr) + .with_field_name(Some(field_name.to_string())) + .build_large_list_array() } +#[deprecated(since = "44.0.0", note = "please use `SingleRowArrayBuilder` instead")] pub fn array_into_fixed_size_list_array( arr: ArrayRef, list_size: usize, @@ -392,6 +492,7 @@ pub fn array_into_fixed_size_list_array( ) } +#[deprecated(since = "44.0.0", note = "please use `SingleRowArrayBuilder` instead")] pub fn array_into_fixed_size_list_array_with_field_name( arr: ArrayRef, list_size: usize, diff --git a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs index 07fa4efc990e..b8b2db08e591 100644 --- a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs +++ b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs @@ -19,14 +19,13 @@ use arrow::array::{ArrayRef, OffsetSizeTrait}; use datafusion_common::cast::as_list_array; -use datafusion_common::utils::array_into_list_array_nullable; +use datafusion_common::utils::SingleRowArrayBuilder; use datafusion_common::ScalarValue; use datafusion_expr_common::accumulator::Accumulator; use datafusion_physical_expr_common::binary_map::{ArrowBytesSet, OutputType}; use datafusion_physical_expr_common::binary_view_map::ArrowBytesViewSet; use std::fmt::Debug; use std::mem::size_of_val; -use std::sync::Arc; /// Specialized implementation of /// `COUNT DISTINCT` for [`StringArray`] [`LargeStringArray`], @@ -49,8 +48,7 @@ impl Accumulator for BytesDistinctCountAccumulator { fn state(&mut self) -> datafusion_common::Result> { let set = self.0.take(); let arr = set.into_state(); - let list = Arc::new(array_into_list_array_nullable(arr)); - Ok(vec![ScalarValue::List(list)]) + Ok(vec![SingleRowArrayBuilder::new(arr).build_list_scalar()]) } fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> { @@ -109,8 +107,7 @@ impl Accumulator for BytesViewDistinctCountAccumulator { fn state(&mut self) -> datafusion_common::Result> { let set = self.0.take(); let arr = set.into_state(); - let list = Arc::new(array_into_list_array_nullable(arr)); - Ok(vec![ScalarValue::List(list)]) + Ok(vec![SingleRowArrayBuilder::new(arr).build_list_scalar()]) } fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> { diff --git a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs index 405b2c2db7bd..c8849168dcd2 100644 --- a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs +++ b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs @@ -33,8 +33,8 @@ use arrow::array::PrimitiveArray; use arrow::datatypes::DataType; use datafusion_common::cast::{as_list_array, as_primitive_array}; -use datafusion_common::utils::array_into_list_array_nullable; use datafusion_common::utils::memory::estimate_memory_size; +use datafusion_common::utils::SingleRowArrayBuilder; use datafusion_common::ScalarValue; use datafusion_expr_common::accumulator::Accumulator; @@ -73,8 +73,7 @@ where PrimitiveArray::::from_iter_values(self.values.iter().cloned()) .with_data_type(self.data_type.clone()), ); - let list = Arc::new(array_into_list_array_nullable(arr)); - Ok(vec![ScalarValue::List(list)]) + Ok(vec![SingleRowArrayBuilder::new(arr).build_list_scalar()]) } fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> { @@ -160,8 +159,7 @@ where let arr = Arc::new(PrimitiveArray::::from_iter_values( self.values.iter().map(|v| v.0), )) as ArrayRef; - let list = Arc::new(array_into_list_array_nullable(arr)); - Ok(vec![ScalarValue::List(list)]) + Ok(vec![SingleRowArrayBuilder::new(arr).build_list_scalar()]) } fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> { diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs index 3b9a521ec972..39f8c88338b1 100644 --- a/datafusion/functions-aggregate/src/array_agg.rs +++ b/datafusion/functions-aggregate/src/array_agg.rs @@ -22,7 +22,7 @@ use arrow::datatypes::DataType; use arrow_schema::{Field, Fields}; use datafusion_common::cast::as_list_array; -use datafusion_common::utils::{array_into_list_array_nullable, get_row_at_idx}; +use datafusion_common::utils::{get_row_at_idx, SingleRowArrayBuilder}; use datafusion_common::{exec_err, ScalarValue}; use datafusion_common::{internal_err, Result}; use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL; @@ -238,9 +238,8 @@ impl Accumulator for ArrayAggAccumulator { } let concated_array = arrow::compute::concat(&element_arrays)?; - let list_array = array_into_list_array_nullable(concated_array); - Ok(ScalarValue::List(Arc::new(list_array))) + Ok(SingleRowArrayBuilder::new(concated_array).build_list_scalar()) } fn size(&self) -> usize { @@ -530,9 +529,7 @@ impl OrderSensitiveArrayAggAccumulator { let ordering_array = StructArray::try_new(struct_field, column_wise_ordering_values, None)?; - Ok(ScalarValue::List(Arc::new(array_into_list_array_nullable( - Arc::new(ordering_array), - )))) + Ok(SingleRowArrayBuilder::new(Arc::new(ordering_array)).build_list_scalar()) } } diff --git a/datafusion/functions-aggregate/src/nth_value.rs b/datafusion/functions-aggregate/src/nth_value.rs index 0c72939633b1..a882a3c8e9d1 100644 --- a/datafusion/functions-aggregate/src/nth_value.rs +++ b/datafusion/functions-aggregate/src/nth_value.rs @@ -26,7 +26,7 @@ use std::sync::{Arc, OnceLock}; use arrow::array::{new_empty_array, ArrayRef, AsArray, StructArray}; use arrow_schema::{DataType, Field, Fields}; -use datafusion_common::utils::{array_into_list_array_nullable, get_row_at_idx}; +use datafusion_common::utils::{get_row_at_idx, SingleRowArrayBuilder}; use datafusion_common::{exec_err, internal_err, not_impl_err, Result, ScalarValue}; use datafusion_expr::aggregate_doc_sections::DOC_SECTION_STATISTICAL; use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; @@ -423,9 +423,7 @@ impl NthValueAccumulator { let ordering_array = StructArray::try_new(struct_field, column_wise_ordering_values, None)?; - Ok(ScalarValue::List(Arc::new(array_into_list_array_nullable( - Arc::new(ordering_array), - )))) + Ok(SingleRowArrayBuilder::new(Arc::new(ordering_array)).build_list_scalar()) } fn evaluate_values(&self) -> ScalarValue { diff --git a/datafusion/functions-nested/src/make_array.rs b/datafusion/functions-nested/src/make_array.rs index 825824a82d20..1027a20c3898 100644 --- a/datafusion/functions-nested/src/make_array.rs +++ b/datafusion/functions-nested/src/make_array.rs @@ -28,7 +28,8 @@ use arrow_array::{ use arrow_buffer::OffsetBuffer; use arrow_schema::DataType::{List, Null}; use arrow_schema::{DataType, Field}; -use datafusion_common::{plan_err, utils::array_into_list_array_nullable, Result}; +use datafusion_common::utils::SingleRowArrayBuilder; +use datafusion_common::{plan_err, Result}; use datafusion_expr::binary::{ try_type_union_resolution_with_struct, type_union_resolution, }; @@ -194,7 +195,9 @@ pub(crate) fn make_array_inner(arrays: &[ArrayRef]) -> Result { let length = arrays.iter().map(|a| a.len()).sum(); // By default Int64 let array = new_null_array(&DataType::Int64, length); - Ok(Arc::new(array_into_list_array_nullable(array))) + Ok(Arc::new( + SingleRowArrayBuilder::new(array).build_list_array(), + )) } _ => array_array::(arrays, data_type), } diff --git a/datafusion/functions-nested/src/utils.rs b/datafusion/functions-nested/src/utils.rs index b9a75724bcde..8381ac7a01e5 100644 --- a/datafusion/functions-nested/src/utils.rs +++ b/datafusion/functions-nested/src/utils.rs @@ -274,27 +274,27 @@ pub(crate) fn get_map_entry_field(data_type: &DataType) -> Result<&Fields> { mod tests { use super::*; use arrow::datatypes::Int64Type; - use datafusion_common::utils::array_into_list_array_nullable; + use datafusion_common::utils::SingleRowArrayBuilder; /// Only test internal functions, array-related sql functions will be tested in sqllogictest `array.slt` #[test] fn test_align_array_dimensions() { - let array1d_1 = + let array1d_1: ArrayRef = Arc::new(ListArray::from_iter_primitive::(vec![ Some(vec![Some(1), Some(2), Some(3)]), Some(vec![Some(4), Some(5)]), ])); - let array1d_2 = + let array1d_2: ArrayRef = Arc::new(ListArray::from_iter_primitive::(vec![ Some(vec![Some(6), Some(7), Some(8)]), ])); - let array2d_1 = Arc::new(array_into_list_array_nullable( - Arc::clone(&array1d_1) as ArrayRef - )) as ArrayRef; - let array2d_2 = Arc::new(array_into_list_array_nullable( - Arc::clone(&array1d_2) as ArrayRef - )) as ArrayRef; + let array2d_1: ArrayRef = Arc::new( + SingleRowArrayBuilder::new(Arc::clone(&array1d_1)).build_list_array(), + ); + let array2d_2 = Arc::new( + SingleRowArrayBuilder::new(Arc::clone(&array1d_2)).build_list_array(), + ); let res = align_array_dimensions::(vec![ array1d_1.to_owned(), @@ -310,10 +310,11 @@ mod tests { expected_dim ); - let array3d_1 = Arc::new(array_into_list_array_nullable(array2d_1)) as ArrayRef; - let array3d_2 = array_into_list_array_nullable(array2d_2.to_owned()); - let res = - align_array_dimensions::(vec![array1d_1, Arc::new(array3d_2)]).unwrap(); + let array3d_1: ArrayRef = + Arc::new(SingleRowArrayBuilder::new(array2d_1).build_list_array()); + let array3d_2: ArrayRef = + Arc::new(SingleRowArrayBuilder::new(array2d_2).build_list_array()); + let res = align_array_dimensions::(vec![array1d_1, array3d_2]).unwrap(); let expected = as_list_array(&array3d_1).unwrap(); let expected_dim = datafusion_common::utils::list_ndims(array3d_1.data_type()); From 284fa10cdcc6c62c51af4ca05a25c09e5167c216 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 2 Dec 2024 15:16:07 -0500 Subject: [PATCH 2/3] tweak --- datafusion/common/src/utils/mod.rs | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index e70cc8aa0b96..ebbb1f8be33a 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -322,7 +322,7 @@ pub fn longest_consecutive_prefix>( } /// Creates single element [`ListArray`], [`LargeListArray`] and -/// [`FixedListArray`] from other arrays +/// [`FixedSizeListArray`] from other arrays /// /// For example this builder can convert `[1, 2, 3]` into `[[1, 2, 3]]` /// @@ -483,13 +483,7 @@ pub fn array_into_fixed_size_list_array( arr: ArrayRef, list_size: usize, ) -> FixedSizeListArray { - let list_size = list_size as i32; - FixedSizeListArray::new( - Arc::new(Field::new_list_field(arr.data_type().to_owned(), true)), - list_size, - arr, - None, - ) + SingleRowArrayBuilder::new(arr).build_fixed_size_list_array(list_size) } #[deprecated(since = "44.0.0", note = "please use `SingleRowArrayBuilder` instead")] @@ -498,13 +492,9 @@ pub fn array_into_fixed_size_list_array_with_field_name( list_size: usize, field_name: &str, ) -> FixedSizeListArray { - let list_size = list_size as i32; - FixedSizeListArray::new( - Arc::new(Field::new(field_name, arr.data_type().to_owned(), true)), - list_size, - arr, - None, - ) + SingleRowArrayBuilder::new(arr) + .with_field_name(Some(field_name.to_string())) + .build_fixed_size_list_array(list_size) } /// Wrap arrays into a single element `ListArray`. From 505edc9cdb286d2905ab4740286958f9974e68ba Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 3 Dec 2024 09:31:29 -0500 Subject: [PATCH 3/3] rename SingleRowArrayBuilder --> SingleRowListArrayBuilder --- datafusion/common/src/scalar/mod.rs | 23 ++++---- datafusion/common/src/utils/mod.rs | 59 +++++++++++++------ .../src/aggregate/count_distinct/bytes.rs | 6 +- .../src/aggregate/count_distinct/native.rs | 6 +- .../functions-aggregate/src/array_agg.rs | 6 +- .../functions-aggregate/src/nth_value.rs | 4 +- datafusion/functions-nested/src/make_array.rs | 4 +- datafusion/functions-nested/src/utils.rs | 10 ++-- 8 files changed, 70 insertions(+), 48 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 9e3acb977e20..0cebedf905cd 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -39,7 +39,7 @@ use crate::cast::{ }; use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err}; use crate::hash_utils::create_hashes; -use crate::utils::SingleRowArrayBuilder; +use crate::utils::SingleRowListArrayBuilder; use arrow::compute::kernels::numeric::*; use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions}; use arrow::{ @@ -2089,7 +2089,7 @@ impl ScalarValue { Self::iter_to_array(values.iter().cloned()).unwrap() }; Arc::new( - SingleRowArrayBuilder::new(values) + SingleRowListArrayBuilder::new(values) .with_nullable(nullable) .build_list_array(), ) @@ -2149,7 +2149,7 @@ impl ScalarValue { Self::iter_to_array(values).unwrap() }; Arc::new( - SingleRowArrayBuilder::new(values) + SingleRowListArrayBuilder::new(values) .with_nullable(nullable) .build_list_array(), ) @@ -2189,7 +2189,7 @@ impl ScalarValue { } else { Self::iter_to_array(values.iter().cloned()).unwrap() }; - Arc::new(SingleRowArrayBuilder::new(values).build_large_list_array()) + Arc::new(SingleRowListArrayBuilder::new(values).build_large_list_array()) } /// Converts a scalar value into an array of `size` rows. @@ -2669,7 +2669,7 @@ impl ScalarValue { let list_array = array.as_list::(); let nested_array = list_array.value(index); // Produces a single element `ListArray` with the value at `index`. - SingleRowArrayBuilder::new(nested_array) + SingleRowListArrayBuilder::new(nested_array) .with_field(field) .build_list_scalar() } @@ -2677,7 +2677,7 @@ impl ScalarValue { let list_array = as_large_list_array(array); let nested_array = list_array.value(index); // Produces a single element `LargeListArray` with the value at `index`. - SingleRowArrayBuilder::new(nested_array) + SingleRowListArrayBuilder::new(nested_array) .with_field(field) .build_large_list_scalar() } @@ -2687,7 +2687,7 @@ impl ScalarValue { let nested_array = list_array.value(index); // Produces a single element `FixedSizeListArray` with the value at `index`. let list_size = nested_array.len(); - SingleRowArrayBuilder::new(nested_array) + SingleRowListArrayBuilder::new(nested_array) .with_field(field) .build_fixed_size_list_scalar(list_size) } @@ -4068,7 +4068,8 @@ mod tests { } fn single_row_list_array(items: Vec<&str>) -> ListArray { - SingleRowArrayBuilder::new(Arc::new(StringArray::from(items))).build_list_array() + SingleRowListArrayBuilder::new(Arc::new(StringArray::from(items))) + .build_list_array() } fn build_list( @@ -5733,13 +5734,13 @@ mod tests { // Define list-of-structs scalars let nl0_array = ScalarValue::iter_to_array(vec![s0, s1.clone()]).unwrap(); - let nl0 = SingleRowArrayBuilder::new(nl0_array).build_list_scalar(); + let nl0 = SingleRowListArrayBuilder::new(nl0_array).build_list_scalar(); let nl1_array = ScalarValue::iter_to_array(vec![s2]).unwrap(); - let nl1 = SingleRowArrayBuilder::new(nl1_array).build_list_scalar(); + let nl1 = SingleRowListArrayBuilder::new(nl1_array).build_list_scalar(); let nl2_array = ScalarValue::iter_to_array(vec![s1]).unwrap(); - let nl2 = SingleRowArrayBuilder::new(nl2_array).build_list_scalar(); + let nl2 = SingleRowListArrayBuilder::new(nl2_array).build_list_scalar(); // iter_to_array for list-of-struct let array = ScalarValue::iter_to_array(vec![nl0, nl1, nl2]).unwrap(); diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index ebbb1f8be33a..5e840f859400 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -331,17 +331,17 @@ pub fn longest_consecutive_prefix>( /// # use std::sync::Arc; /// # use arrow_array::{Array, ListArray}; /// # use arrow_array::types::Int64Type; -/// # use datafusion_common::utils::SingleRowArrayBuilder; +/// # use datafusion_common::utils::SingleRowListArrayBuilder; /// // Array is [1, 2, 3] /// let arr = ListArray::from_iter_primitive::(vec![ /// Some(vec![Some(1), Some(2), Some(3)]), /// ]); /// // Wrap as a list array: [[1, 2, 3]] -/// let list_arr = SingleRowArrayBuilder::new(Arc::new(arr)).build_list_array(); +/// let list_arr = SingleRowListArrayBuilder::new(Arc::new(arr)).build_list_array(); /// assert_eq!(list_arr.len(), 1); /// ``` #[derive(Debug, Clone)] -pub struct SingleRowArrayBuilder { +pub struct SingleRowListArrayBuilder { /// array to be wrapped arr: ArrayRef, /// Should the resulting array be nullable? Defaults to `true`. @@ -351,8 +351,8 @@ pub struct SingleRowArrayBuilder { field_name: Option, } -impl SingleRowArrayBuilder { - /// Create a new instance of `SingleRowArrayBuilder` +impl SingleRowListArrayBuilder { + /// Create a new instance of [`SingleRowListArrayBuilder`] pub fn new(arr: ArrayRef) -> Self { Self { arr, @@ -433,29 +433,38 @@ impl SingleRowArrayBuilder { /// Wrap an array into a single element `ListArray`. /// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]` /// The field in the list array is nullable. -#[deprecated(since = "44.0.0", note = "please use `SingleRowArrayBuilder` instead")] +#[deprecated( + since = "44.0.0", + note = "please use `SingleRowListArrayBuilder` instead" +)] pub fn array_into_list_array_nullable(arr: ArrayRef) -> ListArray { - SingleRowArrayBuilder::new(arr) + SingleRowListArrayBuilder::new(arr) .with_nullable(true) .build_list_array() } /// Wrap an array into a single element `ListArray`. /// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]` -#[deprecated(since = "44.0.0", note = "please use `SingleRowArrayBuilder` instead")] +#[deprecated( + since = "44.0.0", + note = "please use `SingleRowListArrayBuilder` instead" +)] pub fn array_into_list_array(arr: ArrayRef, nullable: bool) -> ListArray { - SingleRowArrayBuilder::new(arr) + SingleRowListArrayBuilder::new(arr) .with_nullable(nullable) .build_list_array() } -#[deprecated(since = "44.0.0", note = "please use `SingleRowArrayBuilder` instead")] +#[deprecated( + since = "44.0.0", + note = "please use `SingleRowListArrayBuilder` instead" +)] pub fn array_into_list_array_with_field_name( arr: ArrayRef, nullable: bool, field_name: &str, ) -> ListArray { - SingleRowArrayBuilder::new(arr) + SingleRowListArrayBuilder::new(arr) .with_nullable(nullable) .with_field_name(Some(field_name.to_string())) .build_list_array() @@ -463,36 +472,48 @@ pub fn array_into_list_array_with_field_name( /// Wrap an array into a single element `LargeListArray`. /// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]` -#[deprecated(since = "44.0.0", note = "please use `SingleRowArrayBuilder` instead")] +#[deprecated( + since = "44.0.0", + note = "please use `SingleRowListArrayBuilder` instead" +)] pub fn array_into_large_list_array(arr: ArrayRef) -> LargeListArray { - SingleRowArrayBuilder::new(arr).build_large_list_array() + SingleRowListArrayBuilder::new(arr).build_large_list_array() } -#[deprecated(since = "44.0.0", note = "please use `SingleRowArrayBuilder` instead")] +#[deprecated( + since = "44.0.0", + note = "please use `SingleRowListArrayBuilder` instead" +)] pub fn array_into_large_list_array_with_field_name( arr: ArrayRef, field_name: &str, ) -> LargeListArray { - SingleRowArrayBuilder::new(arr) + SingleRowListArrayBuilder::new(arr) .with_field_name(Some(field_name.to_string())) .build_large_list_array() } -#[deprecated(since = "44.0.0", note = "please use `SingleRowArrayBuilder` instead")] +#[deprecated( + since = "44.0.0", + note = "please use `SingleRowListArrayBuilder` instead" +)] pub fn array_into_fixed_size_list_array( arr: ArrayRef, list_size: usize, ) -> FixedSizeListArray { - SingleRowArrayBuilder::new(arr).build_fixed_size_list_array(list_size) + SingleRowListArrayBuilder::new(arr).build_fixed_size_list_array(list_size) } -#[deprecated(since = "44.0.0", note = "please use `SingleRowArrayBuilder` instead")] +#[deprecated( + since = "44.0.0", + note = "please use `SingleRowListArrayBuilder` instead" +)] pub fn array_into_fixed_size_list_array_with_field_name( arr: ArrayRef, list_size: usize, field_name: &str, ) -> FixedSizeListArray { - SingleRowArrayBuilder::new(arr) + SingleRowListArrayBuilder::new(arr) .with_field_name(Some(field_name.to_string())) .build_fixed_size_list_array(list_size) } diff --git a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs index b8b2db08e591..e321df61ddc6 100644 --- a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs +++ b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/bytes.rs @@ -19,7 +19,7 @@ use arrow::array::{ArrayRef, OffsetSizeTrait}; use datafusion_common::cast::as_list_array; -use datafusion_common::utils::SingleRowArrayBuilder; +use datafusion_common::utils::SingleRowListArrayBuilder; use datafusion_common::ScalarValue; use datafusion_expr_common::accumulator::Accumulator; use datafusion_physical_expr_common::binary_map::{ArrowBytesSet, OutputType}; @@ -48,7 +48,7 @@ impl Accumulator for BytesDistinctCountAccumulator { fn state(&mut self) -> datafusion_common::Result> { let set = self.0.take(); let arr = set.into_state(); - Ok(vec![SingleRowArrayBuilder::new(arr).build_list_scalar()]) + Ok(vec![SingleRowListArrayBuilder::new(arr).build_list_scalar()]) } fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> { @@ -107,7 +107,7 @@ impl Accumulator for BytesViewDistinctCountAccumulator { fn state(&mut self) -> datafusion_common::Result> { let set = self.0.take(); let arr = set.into_state(); - Ok(vec![SingleRowArrayBuilder::new(arr).build_list_scalar()]) + Ok(vec![SingleRowListArrayBuilder::new(arr).build_list_scalar()]) } fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> { diff --git a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs index c8849168dcd2..e8b6588dc091 100644 --- a/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs +++ b/datafusion/functions-aggregate-common/src/aggregate/count_distinct/native.rs @@ -34,7 +34,7 @@ use arrow::datatypes::DataType; use datafusion_common::cast::{as_list_array, as_primitive_array}; use datafusion_common::utils::memory::estimate_memory_size; -use datafusion_common::utils::SingleRowArrayBuilder; +use datafusion_common::utils::SingleRowListArrayBuilder; use datafusion_common::ScalarValue; use datafusion_expr_common::accumulator::Accumulator; @@ -73,7 +73,7 @@ where PrimitiveArray::::from_iter_values(self.values.iter().cloned()) .with_data_type(self.data_type.clone()), ); - Ok(vec![SingleRowArrayBuilder::new(arr).build_list_scalar()]) + Ok(vec![SingleRowListArrayBuilder::new(arr).build_list_scalar()]) } fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> { @@ -159,7 +159,7 @@ where let arr = Arc::new(PrimitiveArray::::from_iter_values( self.values.iter().map(|v| v.0), )) as ArrayRef; - Ok(vec![SingleRowArrayBuilder::new(arr).build_list_scalar()]) + Ok(vec![SingleRowListArrayBuilder::new(arr).build_list_scalar()]) } fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> { diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs index 39f8c88338b1..bf47ce16a2da 100644 --- a/datafusion/functions-aggregate/src/array_agg.rs +++ b/datafusion/functions-aggregate/src/array_agg.rs @@ -22,7 +22,7 @@ use arrow::datatypes::DataType; use arrow_schema::{Field, Fields}; use datafusion_common::cast::as_list_array; -use datafusion_common::utils::{get_row_at_idx, SingleRowArrayBuilder}; +use datafusion_common::utils::{get_row_at_idx, SingleRowListArrayBuilder}; use datafusion_common::{exec_err, ScalarValue}; use datafusion_common::{internal_err, Result}; use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL; @@ -239,7 +239,7 @@ impl Accumulator for ArrayAggAccumulator { let concated_array = arrow::compute::concat(&element_arrays)?; - Ok(SingleRowArrayBuilder::new(concated_array).build_list_scalar()) + Ok(SingleRowListArrayBuilder::new(concated_array).build_list_scalar()) } fn size(&self) -> usize { @@ -529,7 +529,7 @@ impl OrderSensitiveArrayAggAccumulator { let ordering_array = StructArray::try_new(struct_field, column_wise_ordering_values, None)?; - Ok(SingleRowArrayBuilder::new(Arc::new(ordering_array)).build_list_scalar()) + Ok(SingleRowListArrayBuilder::new(Arc::new(ordering_array)).build_list_scalar()) } } diff --git a/datafusion/functions-aggregate/src/nth_value.rs b/datafusion/functions-aggregate/src/nth_value.rs index a882a3c8e9d1..1a37dbe3004f 100644 --- a/datafusion/functions-aggregate/src/nth_value.rs +++ b/datafusion/functions-aggregate/src/nth_value.rs @@ -26,7 +26,7 @@ use std::sync::{Arc, OnceLock}; use arrow::array::{new_empty_array, ArrayRef, AsArray, StructArray}; use arrow_schema::{DataType, Field, Fields}; -use datafusion_common::utils::{get_row_at_idx, SingleRowArrayBuilder}; +use datafusion_common::utils::{get_row_at_idx, SingleRowListArrayBuilder}; use datafusion_common::{exec_err, internal_err, not_impl_err, Result, ScalarValue}; use datafusion_expr::aggregate_doc_sections::DOC_SECTION_STATISTICAL; use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; @@ -423,7 +423,7 @@ impl NthValueAccumulator { let ordering_array = StructArray::try_new(struct_field, column_wise_ordering_values, None)?; - Ok(SingleRowArrayBuilder::new(Arc::new(ordering_array)).build_list_scalar()) + Ok(SingleRowListArrayBuilder::new(Arc::new(ordering_array)).build_list_scalar()) } fn evaluate_values(&self) -> ScalarValue { diff --git a/datafusion/functions-nested/src/make_array.rs b/datafusion/functions-nested/src/make_array.rs index 1027a20c3898..75064172e77d 100644 --- a/datafusion/functions-nested/src/make_array.rs +++ b/datafusion/functions-nested/src/make_array.rs @@ -28,7 +28,7 @@ use arrow_array::{ use arrow_buffer::OffsetBuffer; use arrow_schema::DataType::{List, Null}; use arrow_schema::{DataType, Field}; -use datafusion_common::utils::SingleRowArrayBuilder; +use datafusion_common::utils::SingleRowListArrayBuilder; use datafusion_common::{plan_err, Result}; use datafusion_expr::binary::{ try_type_union_resolution_with_struct, type_union_resolution, @@ -196,7 +196,7 @@ pub(crate) fn make_array_inner(arrays: &[ArrayRef]) -> Result { // By default Int64 let array = new_null_array(&DataType::Int64, length); Ok(Arc::new( - SingleRowArrayBuilder::new(array).build_list_array(), + SingleRowListArrayBuilder::new(array).build_list_array(), )) } _ => array_array::(arrays, data_type), diff --git a/datafusion/functions-nested/src/utils.rs b/datafusion/functions-nested/src/utils.rs index 8381ac7a01e5..1364e266ce9d 100644 --- a/datafusion/functions-nested/src/utils.rs +++ b/datafusion/functions-nested/src/utils.rs @@ -274,7 +274,7 @@ pub(crate) fn get_map_entry_field(data_type: &DataType) -> Result<&Fields> { mod tests { use super::*; use arrow::datatypes::Int64Type; - use datafusion_common::utils::SingleRowArrayBuilder; + use datafusion_common::utils::SingleRowListArrayBuilder; /// Only test internal functions, array-related sql functions will be tested in sqllogictest `array.slt` #[test] @@ -290,10 +290,10 @@ mod tests { ])); let array2d_1: ArrayRef = Arc::new( - SingleRowArrayBuilder::new(Arc::clone(&array1d_1)).build_list_array(), + SingleRowListArrayBuilder::new(Arc::clone(&array1d_1)).build_list_array(), ); let array2d_2 = Arc::new( - SingleRowArrayBuilder::new(Arc::clone(&array1d_2)).build_list_array(), + SingleRowListArrayBuilder::new(Arc::clone(&array1d_2)).build_list_array(), ); let res = align_array_dimensions::(vec![ @@ -311,9 +311,9 @@ mod tests { ); let array3d_1: ArrayRef = - Arc::new(SingleRowArrayBuilder::new(array2d_1).build_list_array()); + Arc::new(SingleRowListArrayBuilder::new(array2d_1).build_list_array()); let array3d_2: ArrayRef = - Arc::new(SingleRowArrayBuilder::new(array2d_2).build_list_array()); + Arc::new(SingleRowListArrayBuilder::new(array2d_2).build_list_array()); let res = align_array_dimensions::(vec![array1d_1, array3d_2]).unwrap(); let expected = as_list_array(&array3d_1).unwrap();