Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added BinaryArray::into_mut and double-ended support for its iterat…
Browse files Browse the repository at this point in the history
…or (#1255)
  • Loading branch information
ozgrakkurt authored Sep 27, 2022
1 parent 4514cfc commit 22f5568
Show file tree
Hide file tree
Showing 6 changed files with 261 additions and 16 deletions.
45 changes: 36 additions & 9 deletions src/array/binary/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,17 @@ use super::BinaryArray;
pub struct BinaryValueIter<'a, O: Offset> {
array: &'a BinaryArray<O>,
index: usize,
end: usize,
}

impl<'a, O: Offset> BinaryValueIter<'a, O> {
/// Creates a new [`BinaryValueIter`]
pub fn new(array: &'a BinaryArray<O>) -> Self {
Self { array, index: 0 }
Self {
array,
index: 0,
end: array.len(),
}
}
}

Expand All @@ -21,19 +26,41 @@ impl<'a, O: Offset> Iterator for BinaryValueIter<'a, O> {

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.array.len() {
if self.index == self.end {
return None;
} else {
self.index += 1;
}
Some(unsafe { self.array.value_unchecked(self.index - 1) })
let old = self.index;
self.index += 1;
Some(unsafe { self.array.value_unchecked(old) })
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(
self.array.len() - self.index,
Some(self.array.len() - self.index),
)
(self.end - self.index, Some(self.end - self.index))
}

#[inline]
fn nth(&mut self, n: usize) -> Option<Self::Item> {
let new_index = self.index + n;
if new_index > self.end {
self.index = self.end;
None
} else {
self.index = new_index;
self.next()
}
}
}

impl<'a, O: Offset> DoubleEndedIterator for BinaryValueIter<'a, O> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
if self.index == self.end {
None
} else {
self.end -= 1;
Some(unsafe { self.array.value_unchecked(self.end) })
}
}
}

Expand Down
84 changes: 84 additions & 0 deletions src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ use crate::{
trusted_len::TrustedLen,
};

use either::Either;

use super::{
specification::{try_check_offsets, try_check_offsets_bounds},
Array, GenericBinaryArray, Offset,
Expand Down Expand Up @@ -238,6 +240,88 @@ impl<O: Offset> BinaryArray<O> {
self.validity = validity;
}

/// Try to convert this `BinaryArray` to a `MutableBinaryArray`
pub fn into_mut(mut self) -> Either<Self, MutableBinaryArray<O>> {
use Either::*;
if let Some(bitmap) = self.validity {
match bitmap.into_mut() {
// Safety: invariants are preserved
Left(bitmap) => Left(unsafe {
BinaryArray::new_unchecked(
self.data_type,
self.offsets,
self.values,
Some(bitmap),
)
}),
Right(mutable_bitmap) => match (
self.values.get_mut().map(std::mem::take),
self.offsets.get_mut().map(std::mem::take),
) {
(None, None) => {
// Safety: invariants are preserved
Left(unsafe {
BinaryArray::new_unchecked(
self.data_type,
self.offsets,
self.values,
Some(mutable_bitmap.into()),
)
})
}
(None, Some(offsets)) => {
// Safety: invariants are preserved
Left(unsafe {
BinaryArray::new_unchecked(
self.data_type,
offsets.into(),
self.values,
Some(mutable_bitmap.into()),
)
})
}
(Some(mutable_values), None) => {
// Safety: invariants are preserved
Left(unsafe {
BinaryArray::new_unchecked(
self.data_type,
self.offsets,
mutable_values.into(),
Some(mutable_bitmap.into()),
)
})
}
(Some(values), Some(offsets)) => Right(unsafe {
MutableBinaryArray::from_data(
self.data_type,
offsets,
values,
Some(mutable_bitmap),
)
}),
},
}
} else {
match (
self.values.get_mut().map(std::mem::take),
self.offsets.get_mut().map(std::mem::take),
) {
(None, None) => Left(unsafe {
BinaryArray::new_unchecked(self.data_type, self.offsets, self.values, None)
}),
(None, Some(offsets)) => Left(unsafe {
BinaryArray::new_unchecked(self.data_type, offsets.into(), self.values, None)
}),
(Some(values), None) => Left(unsafe {
BinaryArray::new_unchecked(self.data_type, self.offsets, values.into(), None)
}),
(Some(values), Some(offsets)) => Right(unsafe {
MutableBinaryArray::from_data(self.data_type, offsets, values, None)
}),
}
}
}

/// Creates an empty [`BinaryArray`], i.e. whose `.len` is zero.
pub fn new_empty(data_type: DataType) -> Self {
Self::new(
Expand Down
15 changes: 15 additions & 0 deletions src/array/binary/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,21 @@ impl<O: Offset> MutableBinaryArray<O> {
unsafe { self.extend_trusted_len_values_unchecked(iterator) }
}

/// Extends the [`MutableBinaryArray`] from an iterator of values.
/// This differs from `extended_trusted_len` which accepts iterator of optional values.
#[inline]
pub fn extend_values<I, P>(&mut self, iterator: I)
where
P: AsRef<[u8]>,
I: Iterator<Item = P>,
{
let additional = extend_from_values_iter(&mut self.offsets, &mut self.values, iterator);

if let Some(validity) = self.validity.as_mut() {
validity.extend_constant(additional, true);
}
}

/// Extends the [`MutableBinaryArray`] from an `iterator` of values of trusted length.
/// This differs from `extend_trusted_len_unchecked` which accepts iterator of optional
/// values.
Expand Down
7 changes: 0 additions & 7 deletions src/io/parquet/write/binary/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,6 @@ pub(crate) fn encode_delta<O: Offset>(

delta_bitpacked::encode(lengths, buffer);
} else {
println!(
"{:?}",
offsets
.windows(2)
.map(|w| (w[1] - w[0]).to_usize() as i64)
.collect::<Vec<_>>()
);
let lengths = offsets.windows(2).map(|w| (w[1] - w[0]).to_usize() as i64);
delta_bitpacked::encode(lengths, buffer);
}
Expand Down
59 changes: 59 additions & 0 deletions tests/it/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use arrow2::{
};

mod mutable;
mod to_mutable;

#[test]
fn basics() {
Expand Down Expand Up @@ -152,3 +153,61 @@ fn debug() {

assert_eq!(format!("{:?}", array), "BinaryArray[[1, 2], [], None]");
}

#[test]
fn into_mut_1() {
let offsets = Buffer::from(vec![0, 1]);
let values = Buffer::from(b"a".to_vec());
let a = values.clone(); // cloned values
assert_eq!(a, values);
let array = BinaryArray::<i32>::from_data(DataType::Binary, offsets, values, None);
assert!(array.into_mut().is_left());
}

#[test]
fn into_mut_2() {
let offsets = Buffer::from(vec![0, 1]);
let values = Buffer::from(b"a".to_vec());
let a = offsets.clone(); // cloned offsets
assert_eq!(a, offsets);
let array = BinaryArray::<i32>::from_data(DataType::Binary, offsets, values, None);
assert!(array.into_mut().is_left());
}

#[test]
fn into_mut_3() {
let offsets = Buffer::from(vec![0, 1]);
let values = Buffer::from(b"a".to_vec());
let validity = Some([true].into());
let a = validity.clone(); // cloned validity
assert_eq!(a, validity);
let array = BinaryArray::<i32>::new(DataType::Binary, offsets, values, validity);
assert!(array.into_mut().is_left());
}

#[test]
fn into_mut_4() {
let offsets = Buffer::from(vec![0, 1]);
let values = Buffer::from(b"a".to_vec());
let validity = Some([true].into());
let array = BinaryArray::<i32>::new(DataType::Binary, offsets, values, validity);
assert!(array.into_mut().is_right());
}

#[test]
fn rev_iter() {
let array = BinaryArray::<i32>::from(&[Some("hello".as_bytes()), Some(" ".as_bytes()), None]);

assert_eq!(
array.into_iter().rev().collect::<Vec<_>>(),
vec![None, Some(" ".as_bytes()), Some("hello".as_bytes())]
);
}

#[test]
fn iter_nth() {
let array = BinaryArray::<i32>::from(&[Some("hello"), Some(" "), None]);

assert_eq!(array.iter().nth(1), Some(Some(" ".as_bytes())));
assert_eq!(array.iter().nth(10), None);
}
67 changes: 67 additions & 0 deletions tests/it/array/binary/to_mutable.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
use arrow2::{array::BinaryArray, bitmap::Bitmap, buffer::Buffer, datatypes::DataType};

#[test]
fn not_shared() {
let array = BinaryArray::<i32>::from(&[Some("hello"), Some(" "), None]);
assert!(array.into_mut().is_right());
}

#[test]
#[allow(clippy::redundant_clone)]
fn shared_validity() {
let validity = Bitmap::from([true]);
let array = BinaryArray::<i32>::new(
DataType::Binary,
vec![0, 1].into(),
b"a".to_vec().into(),
Some(validity.clone()),
);
assert!(array.into_mut().is_left())
}

#[test]
#[allow(clippy::redundant_clone)]
fn shared_values() {
let values: Buffer<u8> = b"a".to_vec().into();
let array = BinaryArray::<i32>::new(
DataType::Binary,
vec![0, 1].into(),
values.clone(),
Some(Bitmap::from([true])),
);
assert!(array.into_mut().is_left())
}

#[test]
#[allow(clippy::redundant_clone)]
fn shared_offsets_values() {
let offsets: Buffer<i32> = vec![0, 1].into();
let values: Buffer<u8> = b"a".to_vec().into();
let array = BinaryArray::<i32>::new(
DataType::Binary,
offsets.clone(),
values.clone(),
Some(Bitmap::from([true])),
);
assert!(array.into_mut().is_left())
}

#[test]
#[allow(clippy::redundant_clone)]
fn shared_offsets() {
let offsets: Buffer<i32> = vec![0, 1].into();
let array = BinaryArray::<i32>::new(
DataType::Binary,
offsets.clone(),
b"a".to_vec().into(),
Some(Bitmap::from([true])),
);
assert!(array.into_mut().is_left())
}

#[test]
#[allow(clippy::redundant_clone)]
fn shared_all() {
let array = BinaryArray::<i32>::from(&[Some("hello"), Some(" "), None]);
assert!(array.clone().into_mut().is_left())
}

0 comments on commit 22f5568

Please sign in to comment.