From 64e0864b47bab0e14e5ded97056141ada6743bba Mon Sep 17 00:00:00 2001 From: LTLA Date: Sun, 24 Nov 2024 15:06:46 -0800 Subject: [PATCH 1/3] Switch to the new tatami C++ matrix interface. --- DESCRIPTION | 10 ++- NAMESPACE | 1 + R/BSseq-class.R | 2 +- src/BSseq.h | 2 +- src/check_M_and_Cov.cpp | 135 ++++++++++++++++++---------------------- src/init.cpp | 2 +- 6 files changed, 72 insertions(+), 80 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4b3dff0..5b00bd5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,7 +38,8 @@ Imports: Biostrings, utils, HDF5Array (>= 1.19.11), - rhdf5 + rhdf5, + beachmat Suggests: testthat, bsseqData, @@ -49,7 +50,6 @@ Suggests: doParallel, rtracklayer, BSgenome.Hsapiens.UCSC.hg38, - beachmat (>= 1.5.2), batchtools Collate: utils.R @@ -82,6 +82,10 @@ VignetteBuilder: knitr URL: https://github.com/kasperdanielhansen/bsseq BugReports: https://github.com/kasperdanielhansen/bsseq/issues biocViews: DNAMethylation -LinkingTo: Rcpp, beachmat +LinkingTo: + Rcpp, + beachmat, + assorthead +SystemRequirements: C++17 NeedsCompilation: yes RoxygenNote: 7.1.0 diff --git a/NAMESPACE b/NAMESPACE index 0145631..aaea16e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -38,6 +38,7 @@ importMethodsFrom(GenomeInfoDb, "seqlengths", "seqlengths<-", "seqinfo", "seqlevels<-", "sortSeqlevels") importFrom(GenomeInfoDb, "Seqinfo") importFrom(gtools, "combinations") +importFrom(beachmat, initializeCpp) importFrom(Rcpp, sourceCpp) # NOTE: data.table has some NAMESPACE clashes with functions in Bioconductor, diff --git a/R/BSseq-class.R b/R/BSseq-class.R index ad85723..dad56f3 100644 --- a/R/BSseq-class.R +++ b/R/BSseq-class.R @@ -26,7 +26,7 @@ .checkMandCov <- function(M, Cov) { msg <- NULL - validMsg(msg, .Call(cxx_check_M_and_Cov, M, Cov)) + validMsg(msg, .Call(cxx_check_M_and_Cov, initializeCpp(M), initializeCpp(Cov), 1)) } # TODO: Benchmark validity method diff --git a/src/BSseq.h b/src/BSseq.h index 8d9e31f..9c38f68 100644 --- a/src/BSseq.h +++ b/src/BSseq.h @@ -8,7 +8,7 @@ extern "C" { // Validity checking. - SEXP check_M_and_Cov(SEXP, SEXP); + SEXP check_M_and_Cov(SEXP, SEXP, SEXP); } #endif diff --git a/src/check_M_and_Cov.cpp b/src/check_M_and_Cov.cpp index b05994a..1160388 100644 --- a/src/check_M_and_Cov.cpp +++ b/src/check_M_and_Cov.cpp @@ -1,107 +1,94 @@ #include "BSseq.h" -#include "beachmat/integer_matrix.h" -#include "beachmat/numeric_matrix.h" +#include "Rtatami.h" #include "utils.h" +#include +#include + // NOTE: Returning Rcpp::CharacterVector rather than throwing an error because // this function is used within a validity method. -template -Rcpp::RObject check_M_and_Cov_internal(M_class M_bm, Cov_class Cov_bm) { +SEXP check_M_and_Cov(SEXP M, SEXP Cov, SEXP nt) { BEGIN_RCPP + Rtatami::BoundNumericPointer M_bound(M); + const auto& M_bm = *(M_bound->ptr); + Rtatami::BoundNumericPointer Cov_bound(M); + const auto& Cov_bm = *(Cov_bound->ptr); + // Get the dimensions of 'M' and 'Cov' and check these are compatible. - const size_t M_nrow = M_bm->get_nrow(); - const size_t Cov_nrow = Cov_bm->get_nrow(); + const int M_nrow = M_bm.nrow(); + const int Cov_nrow = Cov_bm.nrow(); if (M_nrow != Cov_nrow) { return Rcpp::CharacterVector( "'M' and 'Cov' must have the same number of rows."); } - const size_t M_ncol = M_bm->get_ncol(); - const size_t Cov_ncol = Cov_bm->get_ncol(); + const int M_ncol = M_bm.ncol(); + const int Cov_ncol = Cov_bm.ncol(); if (M_ncol != Cov_ncol) { return Rcpp::CharacterVector( "'M' and 'Cov' must have the same number of columns."); } + Rcpp::IntegerVector raw_nt(nt); + if (raw_nt.size() != 1 || raw_nt[0] <= 0) { + return Rcpp::CharacterVector( + "Number of threads should be a positive integer."); + } + int nthreads = raw_nt[0]; + // Simultaneously loop over columns of 'M' and 'Cov', checking that // `all(0 <= M <= Cov) && !anyNA(M) && !anyNA(Cov)` && all(is.finite(Cov)). - M_column_class M_column(M_nrow); - Cov_column_class Cov_column(Cov_nrow); - for (size_t j = 0; j < M_ncol; ++j) { - // Copy the j-th column of M to M_column and the j-th column of Cov to - // Cov_column - M_bm->get_col(j, M_column.begin()); - Cov_bm->get_col(j, Cov_column.begin()); - // Construct iterators - // NOTE: Iterators constructed outside of loop because they may be of - // different type, which is not supported within a for loop - // constructor. - auto M_column_it = M_column.begin(); - auto Cov_column_it = Cov_column.begin(); - for (M_column_it = M_column.begin(), Cov_column_it = Cov_column.begin(); - M_column_it != M_column.end(); - ++M_column_it, ++Cov_column_it) { - if (isNA(*M_column_it)) { - return Rcpp::CharacterVector("'M' must not contain NAs."); - } - if (isNA(*Cov_column_it)) { - return Rcpp::CharacterVector("'Cov' must not contain NAs."); - } - if (*M_column_it < 0) { - return Rcpp::CharacterVector( - "'M' must not contain negative values."); - } - if (*M_column_it > *Cov_column_it) { - return Rcpp::CharacterVector( - "All values of 'M' must be less than or equal to the corresponding value of 'Cov'."); - } - if (!R_FINITE(*Cov_column_it)) { - return Rcpp::CharacterVector("All values of 'Cov' must be finite."); + std::vector errors(nthreads); + tatami::parallelize([&](int tid, int start, int length) { + std::vector M_buffer(M_nrow), Cov_buffer(Cov_nrow); + auto M_ext = tatami::consecutive_extractor(&M_bm, false, start, length); + auto Cov_ext = tatami::consecutive_extractor(&Cov_bm, false, start, length); + + for (int c = start, cend = start + length; c < cend; ++c) { + auto M_ptr = M_ext->fetch(M_buffer.data()); + auto Cov_ptr = Cov_ext->fetch(Cov_buffer.data()); + + for (int r = 0; r < M_nrow; ++r) { + auto M_current = M_ptr[r]; + auto Cov_current = Cov_ptr[r]; + + if (isNA(M_current)) { + errors[tid] = "'M' must not contain NAs."; + return; + } + if (isNA(Cov_current)) { + errors[tid] = "'Cov' must not contain NAs."; + return; + } + if (M_current < 0) { + errors[tid] = "'M' must not contain negative values."; + return; + } + if (M_current > Cov_current) { + errors[tid] = "All values of 'M' must be less than or equal to the corresponding value of 'Cov'."; + return; + } + if (!R_FINITE(Cov_current)) { + errors[tid] = "All values of 'Cov' must be finite."; + return; + } } } + }, M_ncol, nthreads); + + for (const auto& msg : errors) { + if (!msg.empty()) { + return Rcpp::CharacterVector(msg.c_str()); + } } return R_NilValue; END_RCPP } -SEXP check_M_and_Cov(SEXP M, SEXP Cov) { - BEGIN_RCPP - - // Get the type of 'M' and 'Cov', - int M_type = beachmat::find_sexp_type(M); - int Cov_type = beachmat::find_sexp_type(Cov); - if (M_type == INTSXP && Cov_type == INTSXP) { - auto M_bm = beachmat::create_integer_matrix(M); - auto Cov_bm = beachmat::create_integer_matrix(Cov); - return check_M_and_Cov_internal< - Rcpp::IntegerVector, Rcpp::IntegerVector>(M_bm.get(), Cov_bm.get()); - } else if (M_type == REALSXP && Cov_type == REALSXP) { - auto M_bm = beachmat::create_numeric_matrix(M); - auto Cov_bm = beachmat::create_numeric_matrix(Cov); - return check_M_and_Cov_internal< - Rcpp::NumericVector, Rcpp::NumericVector>(M_bm.get(), Cov_bm.get()); - } else if (M_type == INTSXP && Cov_type == REALSXP) { - auto M_bm = beachmat::create_integer_matrix(M); - auto Cov_bm = beachmat::create_numeric_matrix(Cov); - return check_M_and_Cov_internal< - Rcpp::IntegerVector, Rcpp::NumericVector>(M_bm.get(), Cov_bm.get()); - } else if (M_type == REALSXP && Cov_type == INTSXP) { - auto M_bm = beachmat::create_numeric_matrix(M); - auto Cov_bm = beachmat::create_integer_matrix(Cov); - return check_M_and_Cov_internal< - Rcpp::NumericVector, Rcpp::IntegerVector>(M_bm.get(), Cov_bm.get()); - } - else { - return Rcpp::CharacterVector( - "'M' and 'Cov' must contain integer or numeric values."); - } - END_RCPP -} // TODOs ----------------------------------------------------------------------- // TODO: Add code path to process ordinary R vectors (for use within diff --git a/src/init.cpp b/src/init.cpp index 75a91cb..2c2deb7 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -9,7 +9,7 @@ extern "C" { static const R_CallMethodDef all_call_entries[] = { // Validity checking. - REGISTER(check_M_and_Cov, 2), + REGISTER(check_M_and_Cov, 3), {NULL, NULL, 0} }; From af52ad4ad6ca44d8bcfddca70ded78bea7f78425 Mon Sep 17 00:00:00 2001 From: LTLA Date: Sun, 24 Nov 2024 17:44:16 -0800 Subject: [PATCH 2/3] Minor fixes to pass CHECK. --- DESCRIPTION | 2 +- src/check_M_and_Cov.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5b00bd5..1ce45f5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,7 +39,7 @@ Imports: utils, HDF5Array (>= 1.19.11), rhdf5, - beachmat + beachmat (>= 2.23.2) Suggests: testthat, bsseqData, diff --git a/src/check_M_and_Cov.cpp b/src/check_M_and_Cov.cpp index 1160388..8442c47 100644 --- a/src/check_M_and_Cov.cpp +++ b/src/check_M_and_Cov.cpp @@ -15,7 +15,7 @@ SEXP check_M_and_Cov(SEXP M, SEXP Cov, SEXP nt) { Rtatami::BoundNumericPointer M_bound(M); const auto& M_bm = *(M_bound->ptr); - Rtatami::BoundNumericPointer Cov_bound(M); + Rtatami::BoundNumericPointer Cov_bound(Cov); const auto& Cov_bm = *(Cov_bound->ptr); // Get the dimensions of 'M' and 'Cov' and check these are compatible. From b5d6f31780123bc0903934d28d4ad3639276b510 Mon Sep 17 00:00:00 2001 From: LTLA Date: Tue, 26 Nov 2024 20:13:23 -0800 Subject: [PATCH 3/3] Bumped the required version of assorthead. --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1ce45f5..8d2f7cd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -85,7 +85,7 @@ biocViews: DNAMethylation LinkingTo: Rcpp, beachmat, - assorthead + assorthead (>= 1.1.4) SystemRequirements: C++17 NeedsCompilation: yes RoxygenNote: 7.1.0