Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed improvements #206

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
14 changes: 6 additions & 8 deletions R/IRWLS.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,11 @@ solveIRWLS.weights <-function(S,B,nUMI, OLS=FALSE, constrain = TRUE, verbose = F
#solution <- runif(length(solution))*2 / length(solution) # random initialization
names(solution) <- colnames(S)

S_mat <<- matrix(0,nrow = dim(S)[1],ncol = dim(S)[2]*(dim(S)[2] + 1)/2)
counter = 1
for(i in 1:dim(S)[2])
for(j in i:dim(S)[2]) {
S_mat[,counter] <<- S[,i] * S[,j] # depends on n^2
counter <- counter + 1
}
numCols <- ncol(S)
Index <- which(upper.tri(matrix(0, ncol = numCols, nrow = numCols), diag = TRUE), arr.ind = TRUE)
Index <- Index[order(Index[, 1], Index[, 2]), ,drop=F]
S_mat <<- S[, Index[, 1]] * S[, Index[, 2]]


iterations<-0 #now use dampened WLS, iterate weights until convergence
changes<-c()
Expand Down Expand Up @@ -83,7 +81,7 @@ solveWLS<-function(S,B,initialSol, nUMI, bulk_mode = F, constrain = F){
threshold = max(1e-4, nUMI * 1e-7)
prediction[prediction < threshold] <- threshold
gene_list = rownames(S)
derivatives <- get_der_fast(S, B, gene_list, prediction, bulk_mode = bulk_mode)
derivatives <- get_der_fast(S, B, gene_list, prediction[,1], bulk_mode = bulk_mode)
d_vec <- -derivatives$grad
D_mat <- psd(derivatives$hess)
norm_factor <- norm(D_mat,"2")
Expand Down
121 changes: 87 additions & 34 deletions R/RCTD_helper.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,80 +64,133 @@ check_pairs_type <- function(cell_type_profiles, bead, UMI_tot, score_mat, min_s
}

#Decomposing a single bead via doublet search
process_bead_doublet <- function(cell_type_info, gene_list, UMI_tot, bead, class_df = NULL, constrain = T, verbose = F,
MIN.CHANGE = 0.001, CONFIDENCE_THRESHOLD = 10, DOUBLET_THRESHOLD = 25) {
process_bead_doublet <-function(cell_type_info, gene_list, UMI_tot, bead, class_df = NULL, constrain = T, verbose = F,
MIN.CHANGE = 0.001, CONFIDENCE_THRESHOLD = 10, DOUBLET_THRESHOLD = 25, OLS = FALSE,
n.iter = 50, bulk_mode = FALSE)
{

cell_type_profiles <- cell_type_info[[1]][gene_list,]
cell_type_profiles = cell_type_profiles * UMI_tot
cell_type_profiles = data.matrix(cell_type_profiles)
QL_score_cutoff = CONFIDENCE_THRESHOLD; doublet_like_cutoff = DOUBLET_THRESHOLD
results_all = decompose_full(cell_type_profiles, UMI_tot, bead, constrain = constrain, verbose = verbose, MIN_CHANGE = MIN.CHANGE)
QL_score_cutoff = CONFIDENCE_THRESHOLD
doublet_like_cutoff = DOUBLET_THRESHOLD

results_all <- solveIRWLS.weights(cell_type_profiles,bead,UMI_tot,OLS = OLS, constrain = constrain,
verbose = verbose, n.iter = n.iter, MIN_CHANGE = MIN.CHANGE, bulk_mode = bulk_mode)

all_weights <- results_all$weights
conv_all <- results_all$converged
initial_weight_thresh = 0.01; cell_type_names = cell_type_info[[2]]
initial_weight_thresh = 0.01
cell_type_names = cell_type_info[[2]]
candidates <- names(which(all_weights > initial_weight_thresh))

if(length(candidates) == 0)
{
candidates = cell_type_info[[2]][1:min(3,cell_type_info[[3]])]
if(length(candidates) == 1)

}else if(length(candidates) == 1)
{
if(candidates[1] == cell_type_info[[2]][1])
{
candidates = c(candidates, cell_type_info[[2]][2])
else

}else{

candidates = c(candidates, cell_type_info[[2]][1])
}

}

score_mat = Matrix(0, nrow = length(candidates), ncol = length(candidates))
rownames(score_mat) = candidates; colnames(score_mat) = candidates
rownames(score_mat) = candidates
colnames(score_mat) = candidates
singlet_scores <- numeric(length(candidates))
names(singlet_scores) <- candidates
for(type in candidates) {

for(type in candidates)
{

singlet_scores[type] <- get_singlet_score(cell_type_profiles, bead, UMI_tot,
type, constrain, MIN.CHANGE = MIN.CHANGE)
}


min_score = 0
first_type = NULL; second_type = NULL
first_class = F; second_class = F #indicates whether the first (resp second) refers to a class rather than a type
for(i in 1:(length(candidates)-1)) {
type1 = candidates[i]
for(j in (i+1):length(candidates)) {
type2 = candidates[j]
score = decompose_sparse(cell_type_profiles, UMI_tot, bead, type1, type2, score_mode = T, constrain = constrain, verbose = verbose, MIN.CHANGE = MIN.CHANGE)
score_mat[i,j] = score; score_mat[j,i] = score
if(is.null(second_type) || score < min_score) {
first_type <- type1; second_type <- type2
min_score = score
first_class = F
second_class = F #indicates whether the first (resp second) refers to a class rather than a type

for(i in 1:(length(candidates)-1))
{
type1 <- candidates[i]

for(j in (i+1):length(candidates))
{
type2 <- candidates[j]
score <- decompose_sparse(cell_type_profiles, UMI_tot, bead, type1, type2, score_mode = T, constrain = constrain, verbose = verbose, MIN.CHANGE = MIN.CHANGE)
score_mat[i,j] <- score
score_mat[j,i]<- score

if(is.null(second_type) || score < min_score)
{
first_type <- type1
second_type <- type2
min_score <- score
}
}

}
type1_pres = check_pairs_type(cell_type_profiles, bead, UMI_tot, score_mat, min_score, first_type, class_df, QL_score_cutoff, constrain, singlet_scores, MIN.CHANGE = MIN.CHANGE)
type2_pres = check_pairs_type(cell_type_profiles, bead, UMI_tot, score_mat, min_score, second_type, class_df, QL_score_cutoff, constrain, singlet_scores, MIN.CHANGE = MIN.CHANGE)
if(!type1_pres$all_pairs_class && !type2_pres$all_pairs_class) {

type1_pres <- check_pairs_type(cell_type_profiles, bead, UMI_tot, score_mat, min_score, first_type, class_df, QL_score_cutoff, constrain, singlet_scores, MIN.CHANGE = MIN.CHANGE)
type2_pres <- check_pairs_type(cell_type_profiles, bead, UMI_tot, score_mat, min_score, second_type, class_df, QL_score_cutoff, constrain, singlet_scores, MIN.CHANGE = MIN.CHANGE)
if(!type1_pres$all_pairs_class && !type2_pres$all_pairs_class)
{
spot_class <- "reject"
singlet_score = min_score + 2 * doublet_like_cutoff #arbitrary
}
else if(type1_pres$all_pairs_class && !type2_pres$all_pairs_class) {

}else if(type1_pres$all_pairs_class && !type2_pres$all_pairs_class)
{
first_class <- !type1_pres$all_pairs
singlet_score = type1_pres$singlet_score
spot_class = "doublet_uncertain"
} else if(!type1_pres$all_pairs_class && type2_pres$all_pairs_class) {

}else if(!type1_pres$all_pairs_class && type2_pres$all_pairs_class)
{
first_class <- !type2_pres$all_pairs
singlet_score = type2_pres$singlet_score
temp = first_type; first_type = second_type; second_type = temp
spot_class = "doublet_uncertain"
} else {
}else{
spot_class = "doublet_certain"
singlet_score = min(type1_pres$singlet_score, type2_pres$singlet_score)
first_class <- !type1_pres$all_pairs; second_class <- !type2_pres$all_pairs
if(type2_pres$singlet_score < type1_pres$singlet_score) {
temp = first_type; first_type = second_type; second_type = temp
first_class <- !type2_pres$all_pairs; second_class <- !type1_pres$all_pairs

if(type2_pres$singlet_score < type1_pres$singlet_score)
{
temp = first_type; first_type = second_type
second_type = temp
first_class <- !type2_pres$all_pairs
second_class <- !type1_pres$all_pairs
}

}

if(singlet_score - min_score < doublet_like_cutoff)
spot_class = "singlet"
{
spot_class <- "singlet"
}

doublet_results = decompose_sparse(cell_type_profiles, UMI_tot, bead, first_type, second_type, constrain = constrain, MIN.CHANGE = MIN.CHANGE)
doublet_weights = doublet_results$weights; conv_doublet = doublet_results$converged
spot_class <- factor(spot_class, c("reject", "singlet", "doublet_certain", "doublet_uncertain"))
return(list(all_weights = all_weights, spot_class = spot_class, first_type = first_type, second_type = second_type,
doublet_weights = doublet_weights, min_score = min_score, singlet_score = singlet_score,
conv_all = conv_all, conv_doublet = conv_doublet, score_mat = score_mat, singlet_scores = singlet_scores,
first_class = first_class, second_class = second_class))

Rx<-list(all_weights = all_weights, spot_class = spot_class, first_type = first_type, second_type = second_type,
doublet_weights = doublet_weights, min_score = min_score, singlet_score = singlet_score,
conv_all = conv_all, conv_doublet = conv_doublet, score_mat = score_mat, singlet_scores = singlet_scores,
first_class = first_class, second_class = second_class)

return(Rx)

}

#Decomposing a single bead via doublet search
Expand Down
108 changes: 93 additions & 15 deletions R/platform_effect_normalization.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,43 +67,121 @@ chooseSigma <- function(prediction, counts, Q_mat_all, X_vals, sigma) {
#' @return Returns an \code{\linkS4class{RCTD}} with the estimated \code{sigma_c}.
#' @export
choose_sigma_c <- function(RCTD) {
puck = RCTD@spatialRNA; MIN_UMI = RCTD@config$UMI_min_sigma; sigma = 100
#Q_mat_all <- readRDS('/Users/dcable/Documents/MIT/Research/Rafalab/Projects/slideseq/Cell Demixing/ContentStructure/RCTD/Qmat/Q_mat_c.rds')

message('Step 2/4: Choose Sigma')
puck <- RCTD@spatialRNA
MIN_UMI <- RCTD@config$UMI_min_sigma
sigma <- 100

Q1 <- readRDS(system.file("extdata", "Qmat/Q_mat_1.rds", package = "spacexr"))
Q2 <- readRDS(system.file("extdata", "Qmat/Q_mat_2.rds", package = "spacexr"))
Q3 <- readRDS(system.file("extdata", "Qmat/Q_mat_3.rds", package = "spacexr"))
Q4 <- readRDS(system.file("extdata", "Qmat/Q_mat_4.rds", package = "spacexr"))
Q5 <- readRDS(system.file("extdata", "Qmat/Q_mat_5.rds", package = "spacexr"))

Q_mat_all <- c(Q1, Q2, Q3, Q4, Q5)
sigma_vals <- names(Q_mat_all)
#X_vals <- readRDS('/Users/dcable/Documents/MIT/Research/Rafalab/Projects/slideseq/Cell Demixing/ContentStructure/RCTD/Qmat/X_vals.rds')

X_vals <- readRDS(system.file("extdata", "Qmat/X_vals.rds", package = "spacexr"))

#get initial classification
N_fit = min(RCTD@config$N_fit,sum(puck@nUMI > MIN_UMI))
if(N_fit == 0) {
stop(paste('choose_sigma_c determined a N_fit of 0! This is probably due to unusually low UMI counts per bead in your dataset. Try decreasing the parameter UMI_min_sigma. It currently is',MIN_UMI,'but none of the beads had counts larger than that.'))
stop(paste('choose_sigma_c determined a N_fit of 0! This is probably due to unusually low UMI counts per bead in your dataset. Try decreasing the parameter UMI_min_sigma. It currently is',MIN_UMI,'but none of the beads had counts larger than that.'))
}

fit_ind = sample(names(puck@nUMI[puck@nUMI > MIN_UMI]), N_fit)
beads = t(as.matrix(puck@counts[RCTD@internal_vars$gene_list_reg,fit_ind]))
message(paste('chooseSigma: using initial Q_mat with sigma = ',sigma/100))
for(iter in 1:RCTD@config$N_epoch) {
beads = t(puck@counts[RCTD@internal_vars$gene_list_reg,fit_ind])

#message(paste('chooseSigma: using initial Q_mat with sigma = ',sigma/100))
#print(paste0("N_epoch: ",RCTD@config$N_epoch))

nUMI <- puck@nUMI[fit_ind]
cell_type_means <- RCTD@cell_type_info$renorm[[1]]
gene_list <- RCTD@internal_vars$gene_list_reg
constrain <- FALSE
max_cores <- RCTD@config$max_cores

if(max_cores > 1)
{
message(paste0("Multicore enabled using ", max_cores," cores"))
registerDoParallel(cores=max_cores)
}

NN<-nrow(beads)
pb <- txtProgressBar(min = 0, max = RCTD@config$N_epoch, style = 3)

for(iter in 1:RCTD@config$N_epoch)
{
set_likelihood_vars(Q_mat_all[[as.character(sigma)]], X_vals)
#message(paste('chooseSigma: getting initial weights for #samples: ',N_fit))
results = decompose_batch(puck@nUMI[fit_ind], RCTD@cell_type_info$renorm[[1]], beads, RCTD@internal_vars$gene_list_reg, constrain = F, max_cores = RCTD@config$max_cores)
weights = Matrix(0, nrow = N_fit, ncol = RCTD@cell_type_info$renorm[[3]])
rownames(weights) = fit_ind; colnames(weights) = RCTD@cell_type_info$renorm[[2]];
for(i in 1:N_fit)
weights[i,] = results[[i]]$weights

if(max_cores>1)
{

results<- foreach(i = 1:NN) %dopar% {

#set_likelihood_vars(Q_mat_all[[as.character(sigma)]], X_vals)
weights <- solveIRWLS.weights(data.matrix(RCTD@cell_type_info$renorm[[1]][RCTD@internal_vars$gene_list_reg,]*nUMI[i]),
beads[i,],
nUMI[i],
OLS = FALSE,
constrain = FALSE,
verbose = FALSE,
n.iter = 50,
MIN_CHANGE = 0.001,
bulk_mode = FALSE)

return(weights)
}


}else{

results<-vector("list",length=nrow(beads))

for(i in 1:nrow(beads))
{
set_likelihood_vars(Q_mat_all[[as.character(sigma)]], X_vals)
weights <- solveIRWLS.weights(data.matrix(RCTD@cell_type_info$renorm[[1]][RCTD@internal_vars$gene_list_reg,]*nUMI[i]),
beads[i,],
nUMI[i],
OLS = FALSE,
constrain = FALSE,
verbose = FALSE,
n.iter = 50,
MIN_CHANGE = 0.001,
bulk_mode = FALSE)
results[[i]]<-weights

}


}

weights<- do.call(rbind,lapply(results,function(X){return(X$weights)}))
weights<-as(weights,"dgCMatrix")
rownames(weights) <- fit_ind
colnames(weights) <- RCTD@cell_type_info$renorm[[2]]
prediction <- sweep(as.matrix(RCTD@cell_type_info$renorm[[1]][RCTD@internal_vars$gene_list_reg,]) %*% t(as.matrix(weights)), 2, puck@nUMI[fit_ind], '*')
message(paste('Likelihood value:',calc_log_l_vec(as.vector(prediction), as.vector(t(beads)))))
#message(paste('Likelihood value:',calc_log_l_vec(as.vector(prediction), as.vector(t(beads)))))
sigma_prev <- sigma
sigma <- chooseSigma(prediction, t(beads), Q_mat_all, X_vals, sigma)
message(paste('Sigma value: ', sigma/100))

if(sigma == sigma_prev)
{
message(paste0(RCTD@config$N_epoch,"/",RCTD@config$N_epoch))
break
}

setTxtProgressBar(pb, iter)
}

setTxtProgressBar(pb, iter)

close(pb)
RCTD@internal_vars$sigma <- sigma/100
RCTD@internal_vars$Q_mat <- Q_mat_all[[as.character(sigma)]]
RCTD@internal_vars$X_vals <- X_vals

return(RCTD)
}
Loading