Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

arrow2 cannot read ipc files compressed by official's arrow crate #1605

Open
Jonarod opened this issue Jan 16, 2024 · 0 comments
Open

arrow2 cannot read ipc files compressed by official's arrow crate #1605

Jonarod opened this issue Jan 16, 2024 · 0 comments

Comments

@Jonarod
Copy link

Jonarod commented Jan 16, 2024

I am quite not sure to understand why, but ipc files created by arrow are not readable by arrow2.

Please see reproduction:

[dependencies]
arrow2 = { version = "0.18.0", features = ["io_ipc", "io_ipc_compression"]}
arrow-schema = "50.0.0"
arrow-array = "50.0.0"
arrow-ipc = { version = "50.0.0", features = ["lz4", "zstd"]}
use std::{
  sync::Arc,
  fs::File
};


fn check_using_arrow(filepath: &str) {
  let file = File::open(&filepath).unwrap();
  let mut reader = arrow_ipc::reader::FileReader::try_new(&file, None).unwrap();
  if let Some(maybe_batch) = reader.next() {
    match maybe_batch {
      Ok(_) => {
        println!("✅ {:?} Recognized by Arrow", filepath);
      },
      Err(e) => {
        println!("❌ {:?} Not recognized by Arrow: {:?}", filepath, e);
      }
    }
  }
}

fn check_using_arrow2(filepath: &str) {
  let file = File::open(&filepath).unwrap();
  let metadata = arrow2::io::ipc::read::read_file_metadata(&mut File::open(filepath).unwrap()).unwrap().clone();
  let mut reader = arrow2::io::ipc::read::FileReader::new(&file, metadata, None, None);
  if let Some(maybe_chunk) =  reader.next() {
    match maybe_chunk {
      Ok(_) => {
        println!("✅ {:?} Recognized by Arrow2", filepath);
      },
      Err(e) => {
        println!("❌ {:?} Not recognized by Arrow2: {:?}", filepath, e);
      }
    }
  }
}


fn create_new_file_using_arrow(file: &str, compression: Option<arrow_ipc::CompressionType>) {
  let batch = arrow_array::RecordBatch::try_from_iter(vec![
    ("col", Arc::new(arrow_array::Float64Array::from(vec![1.0])) as arrow_array::ArrayRef),
  ]).unwrap();

  let mut file = File::create(&file).unwrap();
  let options = arrow_ipc::writer::IpcWriteOptions::try_new(8, false, arrow_ipc::MetadataVersion::V5)
                .unwrap()
                .try_with_compression(compression)
                .unwrap();
  {
    let mut writer = arrow_ipc::writer::FileWriter::try_new_with_options(&mut file, &batch.schema(), options).unwrap();
    writer.write(&batch).unwrap();
    writer.finish().unwrap();
  }
}

fn create_new_file_using_arrow2(file: &str, compression: Option<arrow2::io::ipc::write::Compression>) {
  let fields = vec![
    arrow2::datatypes::Field::new(String::from("col"), arrow2::datatypes::DataType::Float64, false),
  ];
  let schema = arrow2::datatypes::Schema::from(fields);

  let mut file = File::create(&file).unwrap();
  let options =  arrow2::io::ipc::write::WriteOptions {
    compression
  };

  let col = arrow2::array::PrimitiveArray::from_vec(vec![1.0]);

  let mut writer = arrow2::io::ipc::write::FileWriter::try_new(&mut file, schema, None, options).unwrap();
  writer.write(&arrow2::chunk::Chunk::new(vec![Box::new(col)]), None).unwrap();
  writer.finish().unwrap();
}

fn main(){
  let filepath_arrow_no_compress = "./created_by_arrow_without_compression.ipc";
  create_new_file_using_arrow(filepath_arrow_no_compress, None);
  check_using_arrow(filepath_arrow_no_compress);
  check_using_arrow2(filepath_arrow_no_compress);


  let filepath_arrow_zstd = "./created_by_arrow_with_zstd.ipc";
  create_new_file_using_arrow(filepath_arrow_zstd, Some(arrow_ipc::CompressionType::ZSTD));
  check_using_arrow(filepath_arrow_zstd);
  check_using_arrow2(filepath_arrow_zstd);


  let filepath_arrow_lz4 = "./created_by_arrow_with_lz4.ipc";
  create_new_file_using_arrow(filepath_arrow_lz4, Some(arrow_ipc::CompressionType::LZ4_FRAME));
  check_using_arrow(filepath_arrow_lz4);
  check_using_arrow2(filepath_arrow_lz4);

}

yields:

"./created_by_arrow_without_compression.ipc" Recognized by Arrow
✅ "./created_by_arrow_without_compression.ipc" Recognized by Arrow2
✅ "./created_by_arrow_with_zstd.ipc" Recognized by Arrow
❌ "./created_by_arrow_with_zstd.ipc" Not recognized by Arrow2: Io(Custom { kind: Other, error: "Unknown frame descriptor" })
✅ "./created_by_arrow_with_lz4.ipc" Recognized by Arrow
❌ "./created_by_arrow_with_lz4.ipc" Not recognized by Arrow2: Io(Custom { kind: Other, error: LZ4Error("ERROR_frameType_unknown") })

As you can see, whenever ipc files are compressed using Arrow, then arrow2 does not recognize it.

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant