Skip to content

Commit

Permalink
perf: Extend functionality on BitmapBuilder and use in Growables (#20754
Browse files Browse the repository at this point in the history
)
  • Loading branch information
orlp authored Jan 16, 2025
1 parent acb20ee commit 725c960
Show file tree
Hide file tree
Showing 12 changed files with 194 additions and 64 deletions.
8 changes: 4 additions & 4 deletions crates/polars-arrow/src/array/growable/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ use super::utils::extend_offset_values;
use super::Growable;
use crate::array::growable::utils::{extend_validity, prepare_validity};
use crate::array::{Array, BinaryArray};
use crate::bitmap::MutableBitmap;
use crate::bitmap::BitmapBuilder;
use crate::datatypes::ArrowDataType;
use crate::offset::{Offset, Offsets};

/// Concrete [`Growable`] for the [`BinaryArray`].
pub struct GrowableBinary<'a, O: Offset> {
arrays: Vec<&'a BinaryArray<O>>,
dtype: ArrowDataType,
validity: Option<MutableBitmap>,
validity: Option<BitmapBuilder>,
values: Vec<u8>,
offsets: Offsets<O>,
}
Expand Down Expand Up @@ -49,7 +49,7 @@ impl<'a, O: Offset> GrowableBinary<'a, O> {
dtype,
offsets.into(),
values.into(),
validity.map(|v| v.into()),
validity.map(|v| v.freeze()),
)
}
}
Expand Down Expand Up @@ -97,7 +97,7 @@ impl<'a, O: Offset> From<GrowableBinary<'a, O>> for BinaryArray<O> {
val.dtype,
val.offsets.into(),
val.values.into(),
val.validity.map(|v| v.into()),
val.validity.map(|v| v.freeze()),
)
}
}
8 changes: 4 additions & 4 deletions crates/polars-arrow/src/array/growable/binview.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ use super::Growable;
use crate::array::binview::{BinaryViewArrayGeneric, ViewType};
use crate::array::growable::utils::{extend_validity, extend_validity_copies, prepare_validity};
use crate::array::{Array, MutableBinaryViewArray, View};
use crate::bitmap::{Bitmap, MutableBitmap};
use crate::bitmap::BitmapBuilder;
use crate::buffer::Buffer;
use crate::datatypes::ArrowDataType;

/// Concrete [`Growable`] for the [`BinaryArray`].
pub struct GrowableBinaryViewArray<'a, T: ViewType + ?Sized> {
arrays: Vec<&'a BinaryViewArrayGeneric<T>>,
dtype: ArrowDataType,
validity: Option<MutableBitmap>,
validity: Option<BitmapBuilder>,
inner: MutableBinaryViewArray<T>,
same_buffers: Option<&'a Arc<[Buffer<u8>]>>,
total_same_buffers_len: usize, // Only valid if same_buffers is Some.
Expand Down Expand Up @@ -81,14 +81,14 @@ impl<'a, T: ViewType + ?Sized> GrowableBinaryViewArray<'a, T> {
self.dtype.clone(),
arr.views.into(),
buffers.clone(),
self.validity.take().map(Bitmap::from),
self.validity.take().map(BitmapBuilder::freeze),
arr.total_bytes_len,
self.total_same_buffers_len,
)
}
} else {
arr.freeze_with_dtype(self.dtype.clone())
.with_validity(self.validity.take().map(Bitmap::from))
.with_validity(self.validity.take().map(BitmapBuilder::freeze))
}
}
}
Expand Down
24 changes: 12 additions & 12 deletions crates/polars-arrow/src/array/growable/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@ use std::sync::Arc;
use super::Growable;
use crate::array::growable::utils::{extend_validity, prepare_validity};
use crate::array::{Array, BooleanArray};
use crate::bitmap::MutableBitmap;
use crate::bitmap::BitmapBuilder;
use crate::datatypes::ArrowDataType;

/// Concrete [`Growable`] for the [`BooleanArray`].
pub struct GrowableBoolean<'a> {
arrays: Vec<&'a BooleanArray>,
dtype: ArrowDataType,
validity: Option<MutableBitmap>,
values: MutableBitmap,
validity: Option<BitmapBuilder>,
values: BitmapBuilder,
}

impl<'a> GrowableBoolean<'a> {
Expand All @@ -30,7 +30,7 @@ impl<'a> GrowableBoolean<'a> {
Self {
arrays,
dtype,
values: MutableBitmap::with_capacity(capacity),
values: BitmapBuilder::with_capacity(capacity),
validity: prepare_validity(use_validity, capacity),
}
}
Expand All @@ -41,8 +41,8 @@ impl<'a> GrowableBoolean<'a> {

BooleanArray::new(
self.dtype.clone(),
values.into(),
validity.map(|v| v.into()),
values.freeze(),
validity.map(|v| v.freeze()),
)
}
}
Expand All @@ -55,11 +55,7 @@ impl<'a> Growable<'a> for GrowableBoolean<'a> {
let values = array.values();

let (slice, offset, _) = values.as_slice();
// SAFETY: invariant offset + length <= slice.len()
unsafe {
self.values
.extend_from_slice_unchecked(slice, start + offset, len);
}
self.values.extend_from_slice(slice, start + offset, len);
}

fn extend_validity(&mut self, additional: usize) {
Expand All @@ -85,6 +81,10 @@ impl<'a> Growable<'a> for GrowableBoolean<'a> {

impl<'a> From<GrowableBoolean<'a>> for BooleanArray {
fn from(val: GrowableBoolean<'a>) -> Self {
BooleanArray::new(val.dtype, val.values.into(), val.validity.map(|v| v.into()))
BooleanArray::new(
val.dtype,
val.values.freeze(),
val.validity.map(|v| v.freeze()),
)
}
}
6 changes: 3 additions & 3 deletions crates/polars-arrow/src/array/growable/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::sync::Arc;
use super::{make_growable, Growable};
use crate::array::growable::utils::{extend_validity, prepare_validity};
use crate::array::{Array, DictionaryArray, DictionaryKey, PrimitiveArray};
use crate::bitmap::MutableBitmap;
use crate::bitmap::BitmapBuilder;
use crate::datatypes::ArrowDataType;

/// Concrete [`Growable`] for the [`DictionaryArray`].
Expand All @@ -14,7 +14,7 @@ pub struct GrowableDictionary<'a, K: DictionaryKey> {
dtype: ArrowDataType,
keys: Vec<&'a PrimitiveArray<K>>,
key_values: Vec<K>,
validity: Option<MutableBitmap>,
validity: Option<BitmapBuilder>,
offsets: Vec<usize>,
values: Box<dyn Array>,
}
Expand Down Expand Up @@ -77,7 +77,7 @@ impl<'a, T: DictionaryKey> GrowableDictionary<'a, T> {
let keys = PrimitiveArray::<T>::new(
T::PRIMITIVE.into(),
key_values.into(),
validity.map(|v| v.into()),
validity.map(|v| v.freeze()),
);

// SAFETY: the invariant of this struct ensures that this is up-held
Expand Down
8 changes: 4 additions & 4 deletions crates/polars-arrow/src/array/growable/fixed_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ use std::sync::Arc;
use super::Growable;
use crate::array::growable::utils::{extend_validity, prepare_validity};
use crate::array::{Array, FixedSizeBinaryArray};
use crate::bitmap::MutableBitmap;
use crate::bitmap::BitmapBuilder;

/// Concrete [`Growable`] for the [`FixedSizeBinaryArray`].
pub struct GrowableFixedSizeBinary<'a> {
arrays: Vec<&'a FixedSizeBinaryArray>,
validity: Option<MutableBitmap>,
validity: Option<BitmapBuilder>,
values: Vec<u8>,
size: usize, // just a cache
}
Expand Down Expand Up @@ -44,7 +44,7 @@ impl<'a> GrowableFixedSizeBinary<'a> {
FixedSizeBinaryArray::new(
self.arrays[0].dtype().clone(),
values.into(),
validity.map(|v| v.into()),
validity.map(|v| v.freeze()),
)
}
}
Expand Down Expand Up @@ -88,7 +88,7 @@ impl<'a> From<GrowableFixedSizeBinary<'a>> for FixedSizeBinaryArray {
FixedSizeBinaryArray::new(
val.arrays[0].dtype().clone(),
val.values.into(),
val.validity.map(|v| v.into()),
val.validity.map(|v| v.freeze()),
)
}
}
8 changes: 4 additions & 4 deletions crates/polars-arrow/src/array/growable/fixed_size_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ use std::sync::Arc;
use super::{make_growable, Growable};
use crate::array::growable::utils::{extend_validity, extend_validity_copies, prepare_validity};
use crate::array::{Array, FixedSizeListArray};
use crate::bitmap::MutableBitmap;
use crate::bitmap::BitmapBuilder;

/// Concrete [`Growable`] for the [`FixedSizeListArray`].
pub struct GrowableFixedSizeList<'a> {
arrays: Vec<&'a FixedSizeListArray>,
validity: Option<MutableBitmap>,
validity: Option<BitmapBuilder>,
values: Box<dyn Growable<'a> + 'a>,
size: usize,
length: usize,
Expand Down Expand Up @@ -61,7 +61,7 @@ impl<'a> GrowableFixedSizeList<'a> {
self.arrays[0].dtype().clone(),
self.length,
values,
validity.map(|v| v.into()),
validity.map(|v| v.freeze()),
)
}
}
Expand Down Expand Up @@ -122,7 +122,7 @@ impl<'a> From<GrowableFixedSizeList<'a>> for FixedSizeListArray {
val.arrays[0].dtype().clone(),
val.length,
values,
val.validity.map(|v| v.into()),
val.validity.map(|v| v.freeze()),
)
}
}
6 changes: 3 additions & 3 deletions crates/polars-arrow/src/array/growable/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::sync::Arc;
use super::{make_growable, Growable};
use crate::array::growable::utils::{extend_validity, prepare_validity};
use crate::array::{Array, ListArray};
use crate::bitmap::MutableBitmap;
use crate::bitmap::BitmapBuilder;
use crate::offset::{Offset, Offsets};

unsafe fn extend_offset_values<O: Offset>(
Expand All @@ -29,7 +29,7 @@ unsafe fn extend_offset_values<O: Offset>(
/// Concrete [`Growable`] for the [`ListArray`].
pub struct GrowableList<'a, O: Offset> {
arrays: Vec<&'a ListArray<O>>,
validity: Option<MutableBitmap>,
validity: Option<BitmapBuilder>,
values: Box<dyn Growable<'a> + 'a>,
offsets: Offsets<O>,
}
Expand Down Expand Up @@ -68,7 +68,7 @@ impl<'a, O: Offset> GrowableList<'a, O> {
self.arrays[0].dtype().clone(),
offsets.into(),
values,
validity.map(|v| v.into()),
validity.map(|v| v.freeze()),
)
}
}
Expand Down
12 changes: 8 additions & 4 deletions crates/polars-arrow/src/array/growable/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@ use std::sync::Arc;
use super::Growable;
use crate::array::growable::utils::{extend_validity, extend_validity_copies, prepare_validity};
use crate::array::{Array, PrimitiveArray};
use crate::bitmap::MutableBitmap;
use crate::bitmap::BitmapBuilder;
use crate::datatypes::ArrowDataType;
use crate::types::NativeType;

/// Concrete [`Growable`] for the [`PrimitiveArray`].
pub struct GrowablePrimitive<'a, T: NativeType> {
dtype: ArrowDataType,
arrays: Vec<&'a PrimitiveArray<T>>,
validity: Option<MutableBitmap>,
validity: Option<BitmapBuilder>,
values: Vec<T>,
}

Expand Down Expand Up @@ -48,7 +48,7 @@ impl<'a, T: NativeType> GrowablePrimitive<'a, T> {
PrimitiveArray::<T>::new(
self.dtype.clone(),
values.into(),
validity.map(|v| v.into()),
validity.map(|v| v.freeze()),
)
}
}
Expand Down Expand Up @@ -105,6 +105,10 @@ impl<'a, T: NativeType> Growable<'a> for GrowablePrimitive<'a, T> {
impl<'a, T: NativeType> From<GrowablePrimitive<'a, T>> for PrimitiveArray<T> {
#[inline]
fn from(val: GrowablePrimitive<'a, T>) -> Self {
PrimitiveArray::<T>::new(val.dtype, val.values.into(), val.validity.map(|v| v.into()))
PrimitiveArray::<T>::new(
val.dtype,
val.values.into(),
val.validity.map(|v| v.freeze()),
)
}
}
8 changes: 4 additions & 4 deletions crates/polars-arrow/src/array/growable/structure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ use std::sync::Arc;
use super::{make_growable, Growable};
use crate::array::growable::utils::{extend_validity, prepare_validity};
use crate::array::{Array, StructArray};
use crate::bitmap::MutableBitmap;
use crate::bitmap::BitmapBuilder;

/// Concrete [`Growable`] for the [`StructArray`].
pub struct GrowableStruct<'a> {
arrays: Vec<&'a StructArray>,
length: usize,
validity: Option<MutableBitmap>,
validity: Option<BitmapBuilder>,
values: Vec<Box<dyn Growable<'a> + 'a>>,
}

Expand Down Expand Up @@ -62,7 +62,7 @@ impl<'a> GrowableStruct<'a> {
self.arrays[0].dtype().clone(),
self.length,
values,
validity.map(|v| v.into()),
validity.map(|v| v.freeze()),
)
}
}
Expand Down Expand Up @@ -129,7 +129,7 @@ impl<'a> From<GrowableStruct<'a>> for StructArray {
val.arrays[0].dtype().clone(),
val.length,
values,
val.validity.map(|v| v.into()),
val.validity.map(|v| v.freeze()),
)
}
}
6 changes: 3 additions & 3 deletions crates/polars-arrow/src/array/growable/utf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ use super::utils::extend_offset_values;
use super::Growable;
use crate::array::growable::utils::{extend_validity, prepare_validity};
use crate::array::{Array, Utf8Array};
use crate::bitmap::MutableBitmap;
use crate::bitmap::BitmapBuilder;
use crate::offset::{Offset, Offsets};

/// Concrete [`Growable`] for the [`Utf8Array`].
pub struct GrowableUtf8<'a, O: Offset> {
arrays: Vec<&'a Utf8Array<O>>,
validity: Option<MutableBitmap>,
validity: Option<BitmapBuilder>,
values: Vec<u8>,
offsets: Offsets<O>,
}
Expand Down Expand Up @@ -49,7 +49,7 @@ impl<'a, O: Offset> GrowableUtf8<'a, O> {
self.arrays[0].dtype().clone(),
offsets.into(),
values.into(),
validity.map(|v| v.into()),
validity.map(|v| v.freeze()),
)
}
}
Expand Down
Loading

0 comments on commit 725c960

Please sign in to comment.