Skip to content

Commit

Permalink
Merge pull request #2 from CreRecombinase/chunkreader
Browse files Browse the repository at this point in the history
Chunkreader
  • Loading branch information
CreRecombinase authored Oct 28, 2019
2 parents 8cfd973 + 8287429 commit a43ac52
Show file tree
Hide file tree
Showing 463 changed files with 128,947 additions and 8,388 deletions.
3 changes: 2 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
.dir-locals.el
.projectile
compile_commands.json
.cquery
.cquery
.ccls
1 change: 0 additions & 1 deletion .dir-locals.el

This file was deleted.

4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,7 @@ GPATH
GTAGS
.cquery*
GRTAGS
/src/Makevars
/.lintr
*.idx
/compile_commands.json
10 changes: 5 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
Package: EigenH5
Type: Package
Title: Read and write R objects using HDF5
Version: 1.2
Version: 1.3
Date: 2017-08-03
Author: Nicholas Knoblauch
Maintainer: Nicholas Knoblauch <[email protected]>
Description: Read and write R objects using zstd+HDF5. Also contains a C++ interface for using with other packages. The C++ interface can handle the reading/writing/converting between column-major matrices (as in R/fortran) and row-major matrices (as in HDF5)
License: What license is it under?
biocViews:
Imports: Rcpp (>= 0.12.12), RcppEigen (>= 0.3.3.3.0),BH,dplyr,progress,tidyr,purrr,magrittr,zstdr,R6,fs,zstdr
LinkingTo: Rcpp, RcppEigen,RcppProgress,testthat,Rhdf5lib,rzstdlib,BH
SystemRequirements: C++11, HDF5 (>= 1.8.13), blosc
Imports: Rcpp (>= 0.12.12), RcppEigen (>= 0.3.3.3.0),BH,dplyr,progress,tidyr,stringr,purrr,magrittr,fs,readr,rlang
LinkingTo: Rcpp, RcppEigen,RcppProgress,testthat,BH,xtensor
SystemRequirements: C++17,libhdf5,libzstd
Suggests: testthat,
knitr,
rmarkdown
VignetteBuilder: knitr
RoxygenNote: 6.1.1
RoxygenNote: 6.1.99.9001
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ useDynLib(EigenH5,.registration=TRUE)
importFrom(Rcpp, evalCpp)
exportPattern("^[[:alpha:]]+")
importFrom(magrittr,"%>%")
importFrom(rlang,"%||%")
importFrom(rlang,"!!")
48 changes: 48 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,26 @@ get_group <- function(f, object_name) {
.Call(`_EigenH5_get_group`, f, object_name)
}

read_matrix_rl <- function(filename, datapath, rows, cols) {
.Call(`_EigenH5_read_matrix_rl`, filename, datapath, rows, cols)
}

read_matrix_v <- function(filename, datapath, rows, cols) {
.Call(`_EigenH5_read_matrix_v`, filename, datapath, rows, cols)
}

read_vector_v <- function(filename, datapath, rows) {
.Call(`_EigenH5_read_vector_v`, filename, datapath, rows)
}

update_matrix_v <- function(data, filename, datapath, rows, cols) {
invisible(.Call(`_EigenH5_update_matrix_v`, data, filename, datapath, rows, cols))
}

update_vector_v <- function(data, filename, datapath, rows) {
invisible(.Call(`_EigenH5_update_vector_v`, data, filename, datapath, rows))
}

openFileHandleRead <- function(filepath) {
.Call(`_EigenH5_openFileHandleRead`, filepath)
}
Expand All @@ -41,6 +61,14 @@ closeFileHandle <- function(fh) {
.Call(`_EigenH5_closeFileHandle`, fh)
}

has_blosc <- function() {
.Call(`_EigenH5_has_blosc`)
}

has_lzf <- function() {
.Call(`_EigenH5_has_lzf`)
}

start_blosc <- function() {
invisible(.Call(`_EigenH5_start_blosc`))
}
Expand Down Expand Up @@ -77,6 +105,10 @@ write_attribute_h5 <- function(data, filename, datapath) {
.Call(`_EigenH5_write_attribute_h5`, data, filename, datapath)
}

read_R_attribute_h5 <- function(filename, datapath) {
.Call(`_EigenH5_read_R_attribute_h5`, filename, datapath)
}

read_attribute_h5 <- function(filename, datapath) {
.Call(`_EigenH5_read_attribute_h5`, filename, datapath)
}
Expand All @@ -85,6 +117,14 @@ create_dataset_h5 <- function(filename, datapath, data, options) {
.Call(`_EigenH5_create_dataset_h5`, filename, datapath, data, options)
}

fast_str2int <- function(input, offset = 0L, na_val = NA_integer_) {
.Call(`_EigenH5_fast_str2int`, input, offset, na_val)
}

fast_str2ascii <- function(input, offset = 0L) {
.Call(`_EigenH5_fast_str2ascii`, input, offset)
}

link_objects_h5 <- function(filename_from, filename_to, datapath_from, datapath_to) {
invisible(.Call(`_EigenH5_link_objects_h5`, filename_from, filename_to, datapath_from, datapath_to))
}
Expand Down Expand Up @@ -121,6 +161,10 @@ isObject <- function(filename, dataname) {
.Call(`_EigenH5_isObject`, filename, dataname)
}

ArrayTypeSize <- function(filename, dataname) {
.Call(`_EigenH5_ArrayTypeSize`, filename, dataname)
}

isDataSet <- function(filename, dataname) {
.Call(`_EigenH5_isDataSet`, filename, dataname)
}
Expand All @@ -137,6 +181,10 @@ typeof_h5 <- function(filename, datapath) {
.Call(`_EigenH5_typeof_h5`, filename, datapath)
}

info_h5 <- function(filename, datapaths) {
.Call(`_EigenH5_info_h5`, filename, datapaths)
}

file_acc_ct <- function(filename) {
.Call(`_EigenH5_file_acc_ct`, filename)
}
Expand Down
71 changes: 71 additions & 0 deletions R/chunk_reader_writer.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
read_matrix_h5v <- function(filename, datapath,i, j, ...){
has.i <- !missing(i)
has.j <- !missing(j)
if (!has.j){
j <- integer()
}
if (!has.i){
i <- integer()
}
argl <- list(...)
stopifnot(length(argl) == 0)

return(read_matrix_v(filename, datapath, i, j))

}


read_vector_h5v <- function(filename, datapath, i, ...){
has.i <- !missing(i)

if (!has.i){
i <- integer()
}
argl <- list(...)
stopifnot(length(argl) == 0)

return(read_vector_v(filename, datapath, i))

}




write_matrix_h5v <- function(data, filename, datapath, ...){

argl <- list(...)
filename <- fs::path_expand(filename)
if(!file.exists(filename)){
# argl[["dim"]] <- argl[["dim"]] %||% dim(data)
create_dataset_h5(filename,datapath,data,argl)
}
if(!isObject(filename,datapath)){
create_dataset_h5(filename = filename ,datapath = datapath, data = data, options = argl)
}
i <- argl[["i"]] %||% 1:NROW(data)
j <- argl[["j"]] %||% 1:NCOL(data)
update_matrix_v(data,filename = filename,datapath,i,j)
}



write_vector_h5v <- function(data, filename, datapath, ...){

argl <- list(...)
filename <- fs::path_expand(filename)
if(!file.exists(filename)){
# argl[["dim"]] <- argl[["dim"]] %||% dim(data)
create_dataset_h5(filename,datapath,data,argl)
}
if(!isObject(filename,datapath)){
create_dataset_h5(filename = filename ,datapath = datapath, data = data, options = argl)
}
i <- argl[["i"]] %||% 1:NROW(data)
#j <- argl[["j"]] %||% 1:ncol(data)
update_vector_v(data,filename = filename,datapath,i)
}





49 changes: 28 additions & 21 deletions R/read_write.R
Original file line number Diff line number Diff line change
@@ -1,29 +1,32 @@
read_df_h5 <- function(filename,datapath,...){
read_df_h5 <- function(filename, datapath = "/", ...){
filename <- fs::path_expand(filename)
stopifnot(file.exists(filename))
dsets <- ls_h5(filename,groupname = datapath)
argl <- list(...)
if(!hasArg(subcols)){
argl[["subcols"]] <- dsets
# subcols <- dsets
}
dsets <- dsets[dsets %in% argl[["subcols"]]]
if(datapath=="/"){
groupname <- ""
}
dsp <- normalizePath(paste(datapath,dsets,sep="/"), mustWork = F)
names(dsp) <- basename(dsp)
return(purrr::map_dfc(dsp,~read_vector_h5(filename, datapath = .x, ...), ...=...))
stopifnot(file.exists(filename))
dsets <- ls_h5(filename, groupname = datapath)
argl <- list(...)
if(!hasArg(subcols)){
argl[["subcols"]] <- dsets
# subcols <- dsets
}
dsets <- dsets[dsets %in% argl[["subcols"]]]
dsp <- fix_paths(datapath, dsets)
names(dsp) <- basename(dsp)
if(!hasArg(subset)){
return(purrr::map_dfc(dsp,~read_vector_h5v(filename = filename,datapath = .x)))
}else{
return(purrr::map_dfc(dsp,~read_vector_h5v(filename = filename,datapath = .x,i=as.integer(argl[["subset"]]))))
}
}



write_df_h5 <- function(df, filename, datapath="/", ...){
argl <- list(...)
filename <- fs::path_expand(filename)
if(datapath=="/"){
datapath <- ""
}
purrr::iwalk(df, ~purrr::invoke(write_vector_h5,
filename = filename,
datapath = normalizePath(paste(datapath, .y, sep="/"),mustWork = F),
datapath = fix_paths(datapath, .y),
data=.x,
argl))
}
Expand Down Expand Up @@ -57,16 +60,20 @@ write_vector_h5 <- function(data, filename, datapath, ...){
app_v <- TRUE
filename <- fs::path_expand(filename)
if(!file.exists(filename)){
if(isTRUE(argl[["append"]]) && is.null(argl[["max_dims"]])){
argl[["max_dims"]] <- NA_integer_
}
create_dataset_h5(filename, datapath, data, argl)
app_v <- FALSE
}
if(!isObject(filename, datapath)){
if(isTRUE(argl[["append"]]) && is.null(argl[["max_dims"]])){
argl[["max_dims"]] <- NA_integer_
}
create_dataset_h5(filename, datapath, data, argl)
}else{
if(hasArg(append)){
if(app_v & argl[["append"]]){
ret <- append_vector_h5(data = data,filename = filename, datapath = datapath, ... = argl)
}
if(app_v && isTRUE(argl[["append"]])){
ret <- append_vector_h5(data = data,filename = filename, datapath = datapath, ... = argl)
}
}
if(!ret){
Expand Down
Loading

0 comments on commit a43ac52

Please sign in to comment.