From a8ba492cf2570c425940e6e35b4811860fa81fe0 Mon Sep 17 00:00:00 2001 From: jpromeror <{ID}+{username}@users.noreply.github.com> Date: Fri, 8 Mar 2024 15:07:15 -0800 Subject: [PATCH 01/10] (IRWLS.R) speed up S_mat construction --- R/IRWLS.R | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/R/IRWLS.R b/R/IRWLS.R index efa325a..71278c6 100644 --- a/R/IRWLS.R +++ b/R/IRWLS.R @@ -34,13 +34,11 @@ solveIRWLS.weights <-function(S,B,nUMI, OLS=FALSE, constrain = TRUE, verbose = F #solution <- runif(length(solution))*2 / length(solution) # random initialization names(solution) <- colnames(S) - S_mat <<- matrix(0,nrow = dim(S)[1],ncol = dim(S)[2]*(dim(S)[2] + 1)/2) - counter = 1 - for(i in 1:dim(S)[2]) - for(j in i:dim(S)[2]) { - S_mat[,counter] <<- S[,i] * S[,j] # depends on n^2 - counter <- counter + 1 - } + numCols <- ncol(S) + Index <- which(upper.tri(matrix(0, ncol = numCols, nrow = numCols), diag = TRUE), arr.ind = TRUE) + Index <- Index[order(Index[, 1], Index[, 2]), ,drop=F] + S_mat <<- S[, Index[, 1]] * S[, Index[, 2]] + iterations<-0 #now use dampened WLS, iterate weights until convergence changes<-c() @@ -83,7 +81,7 @@ solveWLS<-function(S,B,initialSol, nUMI, bulk_mode = F, constrain = F){ threshold = max(1e-4, nUMI * 1e-7) prediction[prediction < threshold] <- threshold gene_list = rownames(S) - derivatives <- get_der_fast(S, B, gene_list, prediction, bulk_mode = bulk_mode) + derivatives <- get_der_fast(S, B, gene_list, prediction[,1], bulk_mode = bulk_mode) d_vec <- -derivatives$grad D_mat <- psd(derivatives$hess) norm_factor <- norm(D_mat,"2") From 8724454c0ca4df0752115d1b169feff387e7954c Mon Sep 17 00:00:00 2001 From: jpromeror <{ID}+{username}@users.noreply.github.com> Date: Fri, 8 Mar 2024 15:16:36 -0800 Subject: [PATCH 02/10] (platform_effect_normalization.R) Add choose sigma multicore function --- R/platform_effect_normalization.R | 127 ++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/R/platform_effect_normalization.R b/R/platform_effect_normalization.R index 0b8244f..7a408d9 100644 --- a/R/platform_effect_normalization.R +++ b/R/platform_effect_normalization.R @@ -107,3 +107,130 @@ choose_sigma_c <- function(RCTD) { RCTD@internal_vars$X_vals <- X_vals return(RCTD) } + +#' Estimates sigma_c by maximum likelihood (multi-core) +#' +#' @param RCTD an \code{\linkS4class{RCTD}} object after running the \code{\link{fitBulk}} function. +#' @return Returns an \code{\linkS4class{RCTD}} with the estimated \code{sigma_c}. +#' @export +choose_sigma_mc<-function(RCTD) +{ + message('Step 2/4: Choose Sigma') + puck <- RCTD@spatialRNA + MIN_UMI <- RCTD@config$UMI_min_sigma + sigma <- 100 + + Q1 <- readRDS(system.file("extdata", "Qmat/Q_mat_1.rds", package = "spacexrHD")) + Q2 <- readRDS(system.file("extdata", "Qmat/Q_mat_2.rds", package = "spacexrHD")) + Q3 <- readRDS(system.file("extdata", "Qmat/Q_mat_3.rds", package = "spacexrHD")) + Q4 <- readRDS(system.file("extdata", "Qmat/Q_mat_4.rds", package = "spacexrHD")) + Q5 <- readRDS(system.file("extdata", "Qmat/Q_mat_5.rds", package = "spacexrHD")) + + Q_mat_all <- c(Q1, Q2, Q3, Q4, Q5) + sigma_vals <- names(Q_mat_all) + + X_vals <- readRDS(system.file("extdata", "Qmat/X_vals.rds", package = "spacexrHD")) + + #get initial classification + N_fit = min(RCTD@config$N_fit,sum(puck@nUMI > MIN_UMI)) + if(N_fit == 0) { + stop(paste('choose_sigma_c determined a N_fit of 0! This is probably due to unusually low UMI counts per bead in your dataset. Try decreasing the parameter UMI_min_sigma. It currently is',MIN_UMI,'but none of the beads had counts larger than that.')) + } + + fit_ind = sample(names(puck@nUMI[puck@nUMI > MIN_UMI]), N_fit) + beads = t(puck@counts[RCTD@internal_vars$gene_list_reg,fit_ind]) + + #message(paste('chooseSigma: using initial Q_mat with sigma = ',sigma/100)) + #print(paste0("N_epoch: ",RCTD@config$N_epoch)) + + nUMI <- puck@nUMI[fit_ind] + cell_type_means <- RCTD@cell_type_info$renorm[[1]] + gene_list <- RCTD@internal_vars$gene_list_reg + constrain <- FALSE + max_cores <- RCTD@config$max_cores + + if(max_cores > 1) + { + message(paste0("Multicore enabled using ", max_cores," cores")) + registerDoParallel(cores=max_cores) + } + + NN<-nrow(beads) + pb <- txtProgressBar(min = 0, max = RCTD@config$N_epoch, style = 3) + + for(iter in 1:RCTD@config$N_epoch) + { + set_likelihood_vars(Q_mat_all[[as.character(sigma)]], X_vals) + + if(max_cores>1) + { + + results<- foreach(i = 1:NN) %dopar% { + + #set_likelihood_vars(Q_mat_all[[as.character(sigma)]], X_vals) + weights <- solveIRWLS.weights(data.matrix(RCTD@cell_type_info$renorm[[1]][RCTD@internal_vars$gene_list_reg,]*nUMI[i]), + beads[i,], + nUMI[i], + OLS = FALSE, + constrain = FALSE, + verbose = FALSE, + n.iter = 50, + MIN_CHANGE = 0.001, + bulk_mode = FALSE) + + return(weights) + } + + + }else{ + + results<-vector("list",length=nrow(beads)) + + for(i in 1:nrow(beads)) + { + set_likelihood_vars(Q_mat_all[[as.character(sigma)]], X_vals) + weights <- solveIRWLS.weights(data.matrix(RCTD@cell_type_info$renorm[[1]][RCTD@internal_vars$gene_list_reg,]*nUMI[i]), + beads[i,], + nUMI[i], + OLS = FALSE, + constrain = FALSE, + verbose = FALSE, + n.iter = 50, + MIN_CHANGE = 0.001, + bulk_mode = FALSE) + results[[i]]<-weights + + } + + + } + + weights<- do.call(rbind,lapply(results,function(X){return(X$weights)})) + weights<-as(weights,"dgCMatrix") + rownames(weights) <- fit_ind + colnames(weights) <- RCTD@cell_type_info$renorm[[2]] + prediction <- sweep(as.matrix(RCTD@cell_type_info$renorm[[1]][RCTD@internal_vars$gene_list_reg,]) %*% t(as.matrix(weights)), 2, puck@nUMI[fit_ind], '*') + #message(paste('Likelihood value:',calc_log_l_vec(as.vector(prediction), as.vector(t(beads))))) + sigma_prev <- sigma + sigma <- chooseSigma(prediction, t(beads), Q_mat_all, X_vals, sigma) + + if(sigma == sigma_prev) + { + message(paste0(RCTD@config$N_epoch,"/",RCTD@config$N_epoch)) + break + } + + setTxtProgressBar(pb, iter) + } + + setTxtProgressBar(pb, iter) + + close(pb) + RCTD@internal_vars$sigma <- sigma/100 + RCTD@internal_vars$Q_mat <- Q_mat_all[[as.character(sigma)]] + RCTD@internal_vars$X_vals <- X_vals + + return(RCTD) + + +} From 0c361bb8b495bcafb277bfd171de25df342e0462 Mon Sep 17 00:00:00 2001 From: jpromeror <{ID}+{username}@users.noreply.github.com> Date: Fri, 8 Mar 2024 15:22:33 -0800 Subject: [PATCH 03/10] (postProcessing.R) Speed up gather_results --- R/postProcessing.R | 65 +++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/R/postProcessing.R b/R/postProcessing.R index 6ea6d3e..42556ea 100644 --- a/R/postProcessing.R +++ b/R/postProcessing.R @@ -2,39 +2,44 @@ # Collects RCTD results gather_results <- function(RCTD, results) { - cell_type_names = RCTD@cell_type_info$renorm[[2]] + + message('Step 4/4: Gather Results') + pb <- txtProgressBar(max = 3, style = 3) + + cell_type_names <- RCTD@cell_type_info$renorm[[2]] barcodes <- colnames(RCTD@spatialRNA@counts) N <- length(results) - weights = Matrix(0, nrow = N, ncol = length(cell_type_names)) - weights_doublet = Matrix(0, nrow = N, ncol = 2) - rownames(weights) = barcodes; rownames(weights_doublet) = barcodes - colnames(weights) = cell_type_names; colnames(weights_doublet) = c('first_type', 'second_type') - empty_cell_types = factor(character(N),levels = cell_type_names) + + empty_cell_types <- factor(character(N),levels = cell_type_names) spot_levels <- c("reject", "singlet", "doublet_certain", "doublet_uncertain") - results_df <- data.frame(spot_class = factor(character(N),levels=spot_levels), - first_type = empty_cell_types, second_type = empty_cell_types, - first_class = logical(N), second_class = logical(N), - min_score = numeric(N), singlet_score = numeric(N), - conv_all = logical(N), conv_doublet = logical(N)) - score_mat <- list() - singlet_scores <- list() - for(i in 1:N) { - if(i %% 1000 == 0) - print(paste("gather_results: finished",i)) - weights_doublet[i,] = results[[i]]$doublet_weights - weights[i,] = results[[i]]$all_weights - results_df[i, "spot_class"] = results[[i]]$spot_class - results_df[i, "first_type"] = results[[i]]$first_type - results_df[i, "second_type"] = results[[i]]$second_type - results_df[i, "first_class"] = results[[i]]$first_class - results_df[i, "second_class"] = results[[i]]$second_class - results_df[i, "min_score"] = results[[i]]$min_score - results_df[i, "singlet_score"] = results[[i]]$singlet_score - results_df[i, "conv_all"] = results[[i]]$conv_all - results_df[i, "conv_doublet"] = results[[i]]$conv_doublet - score_mat[[i]] <- results[[i]]$score_mat - singlet_scores[[i]] <- results[[i]]$singlet_scores - } + + setTxtProgressBar(pb, 1) + + results_df <- data.frame(spot_class = factor(sapply(results,function(X){return(X$spot_class)}),levels=spot_levels), + first_type = sapply(results,function(X){return(X$first_type)}), + scond_type = sapply(results,function(X){return(X$second_type)}), + first_class = sapply(results,function(X){return(X$first_class)}), + second_class = sapply(results,function(X){return(X$second_class)}), + min_score = sapply(results,function(X){return(X$min_score)}), + singlet_score = sapply(results,function(X){return(X$singlet_score)}), + conv_all = sapply(results,function(X){return(X$conv_all)}), + conv_doublet = sapply(results,function(X){return(X$conv_doublet)})) + + setTxtProgressBar(pb, 2) + + weights_doublet <- do.call(rbind,lapply(results,function(X){return(X$doublet_weights)})) + weights <- do.call(rbind,lapply(results,function(X){return(X$all_weights)})) + + rownames(weights) <- barcodes + rownames(weights_doublet) <- barcodes + colnames(weights) <- cell_type_names + colnames(weights_doublet) <- c('first_type', 'second_type') + + score_mat <- lapply(results,function(X){return(X$score_mat)}) + singlet_scores <- lapply(results,function(X){return(X$singlet_scores)}) + + setTxtProgressBar(pb, 3) + rownames(results_df) = barcodes RCTD@results <- list(results_df = results_df, weights = weights, weights_doublet = weights_doublet, score_mat = score_mat, singlet_scores = singlet_scores) From e1ee0545413bb7746480c7fb6ab62f27cea48ac4 Mon Sep 17 00:00:00 2001 From: jpromeror <{ID}+{username}@users.noreply.github.com> Date: Fri, 8 Mar 2024 15:27:11 -0800 Subject: [PATCH 04/10] (prob_model.R) format code for better display --- R/prob_model.R | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/R/prob_model.R b/R/prob_model.R index 15e4bb2..2bc91d7 100644 --- a/R/prob_model.R +++ b/R/prob_model.R @@ -127,21 +127,37 @@ calc_Q_all <- function(Y, lambda) { epsilon <- 1e-4; X_max <- max(X_vals); delta <- 1e-6 lambda <- pmin(pmax(epsilon, lambda),X_max - epsilon) - l <- floor((lambda/delta)^(1/2)) - m <- pmin(l - 9,40) + pmax(ceiling(sqrt(pmax(l-48.7499,0)*4))-2,0) - ti1 <- X_vals[m]; ti <- X_vals[m+1]; hi <- ti - ti1 - #Q0 <- cbind(Y+1, m); Q1 <- cbind(Y+1, m+1) - Q0 <- cbind(Y+1, m); Q1 <- Q0; Q1[,2] <- Q1[,2] + 1 - fti1 <- Q_mat[Q0]; fti <- Q_mat[Q1] - zi1 <- SQ_mat[Q0]; zi <- SQ_mat[Q1] - diff1 <- lambda - ti1; diff2 <- ti - lambda - diff3 <- fti/hi-zi*hi/6; diff4 <- fti1/hi-zi1*hi/6 - zdi <- zi / hi; zdi1 <- zi1 / hi + l <- floor(sqrt(lambda / delta)) + V1 <- pmin(40, l - 9) + V2 <- pmax(0, ceiling(sqrt(pmax(0, l - 48.7499) * 4)) - 2) + m <- V1 + V2 + + ti1 <- X_vals[m] + ti <- X_vals[m + 1] + hi <- ti - ti1 + + Q0 <- cbind(Y+1, m) + Q1 <- Q0 + Q1[,2] <- Q1[,2] + 1 + fti1 <- Q_mat[Q0] + fti <- Q_mat[Q1] + zi1 <- SQ_mat[Q0] + zi <- SQ_mat[Q1] + + diff1 <- lambda - ti1 + diff2 <- ti - lambda + diff3 <- fti / hi - zi * hi / 6 + diff4 <- fti1 / hi - zi1 * hi / 6 + zdi <- zi / hi + zdi1 <- zi1 / hi + # cubic spline interpolation d0_vec <- zdi*(diff1)^3/6 + zdi1*(diff2)^3/6 + diff3*diff1 + diff4*diff2 d1_vec <- zdi*(diff1)^2/2 - zdi1*(diff2)^2/2 + diff3 - diff4 d2_vec <- zdi*(diff1) + zdi1*(diff2) + return(list(d0_vec = d0_vec, d1_vec = d1_vec, d2_vec = d2_vec)) + } #negative log likelihood From 5fd8fca33194f9b51d3d681973c3f813f66f1149 Mon Sep 17 00:00:00 2001 From: jpromeror <{ID}+{username}@users.noreply.github.com> Date: Fri, 8 Mar 2024 15:35:52 -0800 Subject: [PATCH 05/10] (RCTD_helper.R) re-write and speed up process_bead_doublet --- R/RCTD_helper.R | 121 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 87 insertions(+), 34 deletions(-) diff --git a/R/RCTD_helper.R b/R/RCTD_helper.R index 2af6d33..2acc64d 100644 --- a/R/RCTD_helper.R +++ b/R/RCTD_helper.R @@ -64,80 +64,133 @@ check_pairs_type <- function(cell_type_profiles, bead, UMI_tot, score_mat, min_s } #Decomposing a single bead via doublet search -process_bead_doublet <- function(cell_type_info, gene_list, UMI_tot, bead, class_df = NULL, constrain = T, verbose = F, - MIN.CHANGE = 0.001, CONFIDENCE_THRESHOLD = 10, DOUBLET_THRESHOLD = 25) { +process_bead_doublet <-function(cell_type_info, gene_list, UMI_tot, bead, class_df = NULL, constrain = T, verbose = F, + MIN.CHANGE = 0.001, CONFIDENCE_THRESHOLD = 10, DOUBLET_THRESHOLD = 25, OLS = FALSE, + n.iter = 50, bulk_mode = FALSE) +{ + cell_type_profiles <- cell_type_info[[1]][gene_list,] cell_type_profiles = cell_type_profiles * UMI_tot cell_type_profiles = data.matrix(cell_type_profiles) - QL_score_cutoff = CONFIDENCE_THRESHOLD; doublet_like_cutoff = DOUBLET_THRESHOLD - results_all = decompose_full(cell_type_profiles, UMI_tot, bead, constrain = constrain, verbose = verbose, MIN_CHANGE = MIN.CHANGE) + QL_score_cutoff = CONFIDENCE_THRESHOLD + doublet_like_cutoff = DOUBLET_THRESHOLD + + results_all <- solveIRWLS.weights(cell_type_profiles,bead,UMI_tot,OLS = OLS, constrain = constrain, + verbose = verbose, n.iter = n.iter, MIN_CHANGE = MIN.CHANGE, bulk_mode = bulk_mode) + all_weights <- results_all$weights conv_all <- results_all$converged - initial_weight_thresh = 0.01; cell_type_names = cell_type_info[[2]] + initial_weight_thresh = 0.01 + cell_type_names = cell_type_info[[2]] candidates <- names(which(all_weights > initial_weight_thresh)) + if(length(candidates) == 0) + { candidates = cell_type_info[[2]][1:min(3,cell_type_info[[3]])] - if(length(candidates) == 1) + + }else if(length(candidates) == 1) + { if(candidates[1] == cell_type_info[[2]][1]) + { candidates = c(candidates, cell_type_info[[2]][2]) - else + + }else{ + candidates = c(candidates, cell_type_info[[2]][1]) + } + + } + score_mat = Matrix(0, nrow = length(candidates), ncol = length(candidates)) - rownames(score_mat) = candidates; colnames(score_mat) = candidates + rownames(score_mat) = candidates + colnames(score_mat) = candidates singlet_scores <- numeric(length(candidates)) names(singlet_scores) <- candidates - for(type in candidates) { + + for(type in candidates) + { + singlet_scores[type] <- get_singlet_score(cell_type_profiles, bead, UMI_tot, type, constrain, MIN.CHANGE = MIN.CHANGE) } + + min_score = 0 first_type = NULL; second_type = NULL - first_class = F; second_class = F #indicates whether the first (resp second) refers to a class rather than a type - for(i in 1:(length(candidates)-1)) { - type1 = candidates[i] - for(j in (i+1):length(candidates)) { - type2 = candidates[j] - score = decompose_sparse(cell_type_profiles, UMI_tot, bead, type1, type2, score_mode = T, constrain = constrain, verbose = verbose, MIN.CHANGE = MIN.CHANGE) - score_mat[i,j] = score; score_mat[j,i] = score - if(is.null(second_type) || score < min_score) { - first_type <- type1; second_type <- type2 - min_score = score + first_class = F + second_class = F #indicates whether the first (resp second) refers to a class rather than a type + + for(i in 1:(length(candidates)-1)) + { + type1 <- candidates[i] + + for(j in (i+1):length(candidates)) + { + type2 <- candidates[j] + score <- decompose_sparse(cell_type_profiles, UMI_tot, bead, type1, type2, score_mode = T, constrain = constrain, verbose = verbose, MIN.CHANGE = MIN.CHANGE) + score_mat[i,j] <- score + score_mat[j,i]<- score + + if(is.null(second_type) || score < min_score) + { + first_type <- type1 + second_type <- type2 + min_score <- score } } + } - type1_pres = check_pairs_type(cell_type_profiles, bead, UMI_tot, score_mat, min_score, first_type, class_df, QL_score_cutoff, constrain, singlet_scores, MIN.CHANGE = MIN.CHANGE) - type2_pres = check_pairs_type(cell_type_profiles, bead, UMI_tot, score_mat, min_score, second_type, class_df, QL_score_cutoff, constrain, singlet_scores, MIN.CHANGE = MIN.CHANGE) - if(!type1_pres$all_pairs_class && !type2_pres$all_pairs_class) { + + type1_pres <- check_pairs_type(cell_type_profiles, bead, UMI_tot, score_mat, min_score, first_type, class_df, QL_score_cutoff, constrain, singlet_scores, MIN.CHANGE = MIN.CHANGE) + type2_pres <- check_pairs_type(cell_type_profiles, bead, UMI_tot, score_mat, min_score, second_type, class_df, QL_score_cutoff, constrain, singlet_scores, MIN.CHANGE = MIN.CHANGE) + if(!type1_pres$all_pairs_class && !type2_pres$all_pairs_class) + { spot_class <- "reject" singlet_score = min_score + 2 * doublet_like_cutoff #arbitrary - } - else if(type1_pres$all_pairs_class && !type2_pres$all_pairs_class) { + + }else if(type1_pres$all_pairs_class && !type2_pres$all_pairs_class) + { first_class <- !type1_pres$all_pairs singlet_score = type1_pres$singlet_score spot_class = "doublet_uncertain" - } else if(!type1_pres$all_pairs_class && type2_pres$all_pairs_class) { + + }else if(!type1_pres$all_pairs_class && type2_pres$all_pairs_class) + { first_class <- !type2_pres$all_pairs singlet_score = type2_pres$singlet_score temp = first_type; first_type = second_type; second_type = temp spot_class = "doublet_uncertain" - } else { + }else{ spot_class = "doublet_certain" singlet_score = min(type1_pres$singlet_score, type2_pres$singlet_score) first_class <- !type1_pres$all_pairs; second_class <- !type2_pres$all_pairs - if(type2_pres$singlet_score < type1_pres$singlet_score) { - temp = first_type; first_type = second_type; second_type = temp - first_class <- !type2_pres$all_pairs; second_class <- !type1_pres$all_pairs + + if(type2_pres$singlet_score < type1_pres$singlet_score) + { + temp = first_type; first_type = second_type + second_type = temp + first_class <- !type2_pres$all_pairs + second_class <- !type1_pres$all_pairs } + } + if(singlet_score - min_score < doublet_like_cutoff) - spot_class = "singlet" + { + spot_class <- "singlet" + } + doublet_results = decompose_sparse(cell_type_profiles, UMI_tot, bead, first_type, second_type, constrain = constrain, MIN.CHANGE = MIN.CHANGE) doublet_weights = doublet_results$weights; conv_doublet = doublet_results$converged spot_class <- factor(spot_class, c("reject", "singlet", "doublet_certain", "doublet_uncertain")) - return(list(all_weights = all_weights, spot_class = spot_class, first_type = first_type, second_type = second_type, - doublet_weights = doublet_weights, min_score = min_score, singlet_score = singlet_score, - conv_all = conv_all, conv_doublet = conv_doublet, score_mat = score_mat, singlet_scores = singlet_scores, - first_class = first_class, second_class = second_class)) + + Rx<-list(all_weights = all_weights, spot_class = spot_class, first_type = first_type, second_type = second_type, + doublet_weights = doublet_weights, min_score = min_score, singlet_score = singlet_score, + conv_all = conv_all, conv_doublet = conv_doublet, score_mat = score_mat, singlet_scores = singlet_scores, + first_class = first_class, second_class = second_class) + + return(Rx) + } #Decomposing a single bead via doublet search From 205b2c9b21fbc139f89ca0d55f425067d34a891f Mon Sep 17 00:00:00 2001 From: jpromeror <{ID}+{username}@users.noreply.github.com> Date: Fri, 8 Mar 2024 15:46:19 -0800 Subject: [PATCH 06/10] (runRCTD.R) change parallel approach using foreach and DoParallel --- R/runRCTD.R | 64 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/R/runRCTD.R b/R/runRCTD.R index c24e851..79eef0d 100644 --- a/R/runRCTD.R +++ b/R/runRCTD.R @@ -60,39 +60,47 @@ process_data <- function(puck, gene_list, cell_type_info, proportions = NULL, tr #' @return Returns \code{results}, a list of RCTD results for each pixel, which can be organized by #' feeding into \code{\link{gather_results}} #' @export -process_beads_batch <- function(cell_type_info, gene_list, puck, class_df = NULL, constrain = T, - MAX_CORES = 8, MIN.CHANGE = 0.001, CONFIDENCE_THRESHOLD = 10, DOUBLET_THRESHOLD = 25) { - beads = t(as.matrix(puck@counts[gene_list,])) - #out_file = "logs/process_beads_log.txt" - #if (file.exists(out_file)) - # file.remove(out_file) - if(MAX_CORES > 1) { - numCores = parallel::detectCores(); - if(parallel::detectCores() > MAX_CORES) - numCores <- MAX_CORES - cl <- parallel::makeCluster(numCores,setup_strategy = "sequential",outfile="") - doParallel::registerDoParallel(cl) - environ = c('decompose_full','decompose_sparse','solveIRWLS.weights','solveOLS','solveWLS','Q_mat','X_vals','K_val', 'SQ_mat') - results <- foreach::foreach(i = 1:(dim(beads)[1]), .export = environ) %dopar% { #.packages = c("quadprog"), - #if(i %% 100 == 0) - # cat(paste0("Finished sample: ",i,"\n"), file=out_file, append=TRUE) - assign("Q_mat",Q_mat, envir = globalenv()); assign("X_vals",X_vals, envir = globalenv()) - assign("K_val",K_val, envir = globalenv()); assign("SQ_mat",SQ_mat, envir = globalenv()); - result = process_bead_doublet(cell_type_info, gene_list, puck@nUMI[i], beads[i,], - class_df = class_df, constrain = constrain, MIN.CHANGE = MIN.CHANGE, - CONFIDENCE_THRESHOLD = CONFIDENCE_THRESHOLD, DOUBLET_THRESHOLD = DOUBLET_THRESHOLD) - result +process_beads_batch <- function(cell_type_info, gene_list, puck, class_df = NULL, constrain = T,MAX_CORES = 8, MIN.CHANGE = 0.001, CONFIDENCE_THRESHOLD = 10, DOUBLET_THRESHOLD = 25) +{ + + beads <- t(puck@counts[gene_list,]) + if(MAX_CORES > 1) + { + message('Step 3/4: fitPixels (No progress bar)') + message(paste0("Multicore enabled using ", MAX_CORES," cores")) + #registerDoMC(MAX_CORES) + registerDoParallel(cores=MAX_CORES) + NN<-nrow(beads) + + results <- foreach(i = 1:NN) %dopar% { + + assign("Q_mat",Q_mat, envir = globalenv()) + assign("X_vals",X_vals, envir = globalenv()) + assign("K_val",K_val, envir = globalenv()) + assign("SQ_mat",SQ_mat, envir = globalenv()) + + result <- process_bead_doublet(cell_type_info, gene_list, puck@nUMI[i], beads[i,], + class_df = class_df, constrain = constrain, MIN.CHANGE = MIN.CHANGE, + CONFIDENCE_THRESHOLD = CONFIDENCE_THRESHOLD, DOUBLET_THRESHOLD = DOUBLET_THRESHOLD) + + return(result) + + } - parallel::stopCluster(cl) - } else { - #not parallel + }else{ + results <- list() - for(i in 1:(dim(beads)[1])) { + + for(i in 1:(dim(beads)[1])) + { results[[i]] <- process_bead_doublet(cell_type_info, gene_list, puck@nUMI[i], beads[i,], - class_df = class_df, constrain = constrain, MIN.CHANGE = MIN.CHANGE, - CONFIDENCE_THRESHOLD = CONFIDENCE_THRESHOLD, DOUBLET_THRESHOLD = DOUBLET_THRESHOLD) + class_df = class_df, constrain = constrain, MIN.CHANGE = MIN.CHANGE, + CONFIDENCE_THRESHOLD = CONFIDENCE_THRESHOLD, DOUBLET_THRESHOLD = DOUBLET_THRESHOLD) + } + } + return(results) } From 98055eb6c93d9492cdfd7451de9d7c22138387fc Mon Sep 17 00:00:00 2001 From: jpromeror <{ID}+{username}@users.noreply.github.com> Date: Fri, 8 Mar 2024 15:49:45 -0800 Subject: [PATCH 07/10] (platform_effect_normalization.R) Replace existing function instead of creating new one --- R/platform_effect_normalization.R | 51 +------------------------------ 1 file changed, 1 insertion(+), 50 deletions(-) diff --git a/R/platform_effect_normalization.R b/R/platform_effect_normalization.R index 7a408d9..06a0fb0 100644 --- a/R/platform_effect_normalization.R +++ b/R/platform_effect_normalization.R @@ -67,54 +67,7 @@ chooseSigma <- function(prediction, counts, Q_mat_all, X_vals, sigma) { #' @return Returns an \code{\linkS4class{RCTD}} with the estimated \code{sigma_c}. #' @export choose_sigma_c <- function(RCTD) { - puck = RCTD@spatialRNA; MIN_UMI = RCTD@config$UMI_min_sigma; sigma = 100 - #Q_mat_all <- readRDS('/Users/dcable/Documents/MIT/Research/Rafalab/Projects/slideseq/Cell Demixing/ContentStructure/RCTD/Qmat/Q_mat_c.rds') - Q1 <- readRDS(system.file("extdata", "Qmat/Q_mat_1.rds", package = "spacexr")) - Q2 <- readRDS(system.file("extdata", "Qmat/Q_mat_2.rds", package = "spacexr")) - Q3 <- readRDS(system.file("extdata", "Qmat/Q_mat_3.rds", package = "spacexr")) - Q4 <- readRDS(system.file("extdata", "Qmat/Q_mat_4.rds", package = "spacexr")) - Q5 <- readRDS(system.file("extdata", "Qmat/Q_mat_5.rds", package = "spacexr")) - Q_mat_all <- c(Q1, Q2, Q3, Q4, Q5) - sigma_vals <- names(Q_mat_all) - #X_vals <- readRDS('/Users/dcable/Documents/MIT/Research/Rafalab/Projects/slideseq/Cell Demixing/ContentStructure/RCTD/Qmat/X_vals.rds') - X_vals <- readRDS(system.file("extdata", "Qmat/X_vals.rds", package = "spacexr")) - #get initial classification - N_fit = min(RCTD@config$N_fit,sum(puck@nUMI > MIN_UMI)) - if(N_fit == 0) { - stop(paste('choose_sigma_c determined a N_fit of 0! This is probably due to unusually low UMI counts per bead in your dataset. Try decreasing the parameter UMI_min_sigma. It currently is',MIN_UMI,'but none of the beads had counts larger than that.')) - } - fit_ind = sample(names(puck@nUMI[puck@nUMI > MIN_UMI]), N_fit) - beads = t(as.matrix(puck@counts[RCTD@internal_vars$gene_list_reg,fit_ind])) - message(paste('chooseSigma: using initial Q_mat with sigma = ',sigma/100)) - for(iter in 1:RCTD@config$N_epoch) { - set_likelihood_vars(Q_mat_all[[as.character(sigma)]], X_vals) - #message(paste('chooseSigma: getting initial weights for #samples: ',N_fit)) - results = decompose_batch(puck@nUMI[fit_ind], RCTD@cell_type_info$renorm[[1]], beads, RCTD@internal_vars$gene_list_reg, constrain = F, max_cores = RCTD@config$max_cores) - weights = Matrix(0, nrow = N_fit, ncol = RCTD@cell_type_info$renorm[[3]]) - rownames(weights) = fit_ind; colnames(weights) = RCTD@cell_type_info$renorm[[2]]; - for(i in 1:N_fit) - weights[i,] = results[[i]]$weights - prediction <- sweep(as.matrix(RCTD@cell_type_info$renorm[[1]][RCTD@internal_vars$gene_list_reg,]) %*% t(as.matrix(weights)), 2, puck@nUMI[fit_ind], '*') - message(paste('Likelihood value:',calc_log_l_vec(as.vector(prediction), as.vector(t(beads))))) - sigma_prev <- sigma - sigma <- chooseSigma(prediction, t(beads), Q_mat_all, X_vals, sigma) - message(paste('Sigma value: ', sigma/100)) - if(sigma == sigma_prev) - break - } - RCTD@internal_vars$sigma <- sigma/100 - RCTD@internal_vars$Q_mat <- Q_mat_all[[as.character(sigma)]] - RCTD@internal_vars$X_vals <- X_vals - return(RCTD) -} - -#' Estimates sigma_c by maximum likelihood (multi-core) -#' -#' @param RCTD an \code{\linkS4class{RCTD}} object after running the \code{\link{fitBulk}} function. -#' @return Returns an \code{\linkS4class{RCTD}} with the estimated \code{sigma_c}. -#' @export -choose_sigma_mc<-function(RCTD) -{ + message('Step 2/4: Choose Sigma') puck <- RCTD@spatialRNA MIN_UMI <- RCTD@config$UMI_min_sigma @@ -231,6 +184,4 @@ choose_sigma_mc<-function(RCTD) RCTD@internal_vars$X_vals <- X_vals return(RCTD) - - } From f5bdbe85455571dde4572687646d78a031989d16 Mon Sep 17 00:00:00 2001 From: jpromeror <{ID}+{username}@users.noreply.github.com> Date: Mon, 11 Mar 2024 16:58:08 -0700 Subject: [PATCH 08/10] (platform_effect_normalization.R) rename package to load extdata --- R/platform_effect_normalization.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/platform_effect_normalization.R b/R/platform_effect_normalization.R index 06a0fb0..a09a267 100644 --- a/R/platform_effect_normalization.R +++ b/R/platform_effect_normalization.R @@ -73,11 +73,11 @@ choose_sigma_c <- function(RCTD) { MIN_UMI <- RCTD@config$UMI_min_sigma sigma <- 100 - Q1 <- readRDS(system.file("extdata", "Qmat/Q_mat_1.rds", package = "spacexrHD")) - Q2 <- readRDS(system.file("extdata", "Qmat/Q_mat_2.rds", package = "spacexrHD")) - Q3 <- readRDS(system.file("extdata", "Qmat/Q_mat_3.rds", package = "spacexrHD")) - Q4 <- readRDS(system.file("extdata", "Qmat/Q_mat_4.rds", package = "spacexrHD")) - Q5 <- readRDS(system.file("extdata", "Qmat/Q_mat_5.rds", package = "spacexrHD")) + Q1 <- readRDS(system.file("extdata", "Qmat/Q_mat_1.rds", package = "spacexr")) + Q2 <- readRDS(system.file("extdata", "Qmat/Q_mat_2.rds", package = "spacexr")) + Q3 <- readRDS(system.file("extdata", "Qmat/Q_mat_3.rds", package = "spacexr")) + Q4 <- readRDS(system.file("extdata", "Qmat/Q_mat_4.rds", package = "spacexr")) + Q5 <- readRDS(system.file("extdata", "Qmat/Q_mat_5.rds", package = "spacexr")) Q_mat_all <- c(Q1, Q2, Q3, Q4, Q5) sigma_vals <- names(Q_mat_all) From a2cfcfd31b3e15e72a7b8c054385ecda566b07b6 Mon Sep 17 00:00:00 2001 From: jpromeror <{ID}+{username}@users.noreply.github.com> Date: Mon, 11 Mar 2024 16:58:48 -0700 Subject: [PATCH 09/10] (platform_effect_normalization.R) rename package to load extdata --- R/platform_effect_normalization.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/platform_effect_normalization.R b/R/platform_effect_normalization.R index a09a267..1ab8987 100644 --- a/R/platform_effect_normalization.R +++ b/R/platform_effect_normalization.R @@ -82,7 +82,7 @@ choose_sigma_c <- function(RCTD) { Q_mat_all <- c(Q1, Q2, Q3, Q4, Q5) sigma_vals <- names(Q_mat_all) - X_vals <- readRDS(system.file("extdata", "Qmat/X_vals.rds", package = "spacexrHD")) + X_vals <- readRDS(system.file("extdata", "Qmat/X_vals.rds", package = "spacexr")) #get initial classification N_fit = min(RCTD@config$N_fit,sum(puck@nUMI > MIN_UMI)) From 33d375cc5d7b7b5db97ee5cc1d3dd32b682afc9e Mon Sep 17 00:00:00 2001 From: jpromeror <{ID}+{username}@users.noreply.github.com> Date: Thu, 28 Mar 2024 16:26:12 -0700 Subject: [PATCH 10/10] (spacexr.R) Add MIN_OBS as parameter to create.RCTD function --- R/spacexr.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/spacexr.R b/R/spacexr.R index 9730553..861588f 100644 --- a/R/spacexr.R +++ b/R/spacexr.R @@ -41,12 +41,12 @@ process_cell_type_info <- function(reference, cell_type_names, CELL_MIN = 25) { #' @return an \code{\linkS4class{RCTD}} object, which is ready to run the \code{\link{run.RCTD}} function #' @export create.RCTD <- function(spatialRNA, reference, max_cores = 4, test_mode = FALSE, gene_cutoff = 0.000125, fc_cutoff = 0.5, gene_cutoff_reg = 0.0002, fc_cutoff_reg = 0.75, UMI_min = 100, UMI_max = 20000000, counts_MIN = 10, UMI_min_sigma = 300, - class_df = NULL, CELL_MIN_INSTANCE = 25, cell_type_names = NULL, MAX_MULTI_TYPES = 4, keep_reference = F, cell_type_profiles = NULL, CONFIDENCE_THRESHOLD = 5, DOUBLET_THRESHOLD = 20) { + class_df = NULL, CELL_MIN_INSTANCE = 25, cell_type_names = NULL, MAX_MULTI_TYPES = 4, keep_reference = F, cell_type_profiles = NULL, CONFIDENCE_THRESHOLD = 5, DOUBLET_THRESHOLD = 20, MIN_OBS = 3) { config <- list(gene_cutoff = gene_cutoff, fc_cutoff = fc_cutoff, gene_cutoff_reg = gene_cutoff_reg, fc_cutoff_reg = fc_cutoff_reg, UMI_min = UMI_min, UMI_min_sigma = UMI_min_sigma, max_cores = max_cores, N_epoch = 8, N_X = 50000, K_val = 100, N_fit = 1000, N_epoch_bulk = 30, MIN_CHANGE_BULK = 0.0001, MIN_CHANGE_REG = 0.001, UMI_max = UMI_max, counts_MIN = counts_MIN, - MIN_OBS = 3, MAX_MULTI_TYPES = MAX_MULTI_TYPES, CONFIDENCE_THRESHOLD = CONFIDENCE_THRESHOLD, DOUBLET_THRESHOLD = DOUBLET_THRESHOLD) + MIN_OBS = MIN_OBS, MAX_MULTI_TYPES = MAX_MULTI_TYPES, CONFIDENCE_THRESHOLD = CONFIDENCE_THRESHOLD, DOUBLET_THRESHOLD = DOUBLET_THRESHOLD) if(test_mode) config <- list(gene_cutoff = .00125, fc_cutoff = 0.5, gene_cutoff_reg = 0.002, fc_cutoff_reg = 0.75, UMI_min = 1000, N_epoch = 1, N_X = 50000, K_val = 100, N_fit = 50, N_epoch_bulk = 4, MIN_CHANGE_BULK = 1,