add compared methods

YangLabHKUST · Jun 20, 2023 · d77ed81 · d77ed81
1 parent 3dcbfbb
commit d77ed81
Show file tree

Hide file tree

Showing 11 changed files with 347 additions and 16 deletions.
diff --git a/compared_methods/Cell2location_pipeline.py b/compared_methods/Cell2location_pipeline.py
@@ -9,6 +9,8 @@
 
 import os
 
+os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+
 import cell2location
 import scvi
 

diff --git a/compared_methods/CytoSPACE_pipeline.py b/compared_methods/CytoSPACE_pipeline.py
@@ -34,6 +34,18 @@
 adata_st.var_names_make_unique()
 adata_st.obs_names_make_unique()
 
+if adata_st.X.max()<30:
+ try:
+ adata_st.X = np.exp(adata_st.X) - 1
+ except:
+ adata_st.X = np.exp(adata_st.X.toarray()) - 1
+
+if adata_sc.X.max()<30:
+ try:
+ adata_sc.X = np.exp(adata_sc.X) - 1
+ except:
+ adata_sc.X = np.exp(adata_sc.X.toarray()) - 1
+
 sel_genes = list(set(adata_st.var.index.values)&set(adata_sc.var.index.values))
 
 adata_sc = adata_sc[:,sel_genes]
@@ -64,8 +76,12 @@
 sp_express_df.to_csv(sp_express_df_file_name,sep='\t')
 
 
-adata_st.obs['row'] = adata_st.obs['X']
-adata_st.obs['col'] = adata_st.obs['Y']
+try:
+ adata_st.obs['row'] = adata_st.obs['X']
+ adata_st.obs['col'] = adata_st.obs['Y']
+except:
+ adata_st.obs['row'] = adata_st.obs['x']
+ adata_st.obs['col'] = adata_st.obs['y']
 sp_coord_df = adata_st.obs[['row','col']]
 sp_coord_df.index.name = 'SpotID'
 sp_coord_df_file = tempfile.NamedTemporaryFile(suffix='.txt')
@@ -100,12 +116,14 @@
 df['cell_type_cytospace'] = df['cytospace_result'].apply(lambda x:x.split(':')[0])
 df['cell_index_cytospace'] = df['cytospace_result'].apply(lambda x:x.split(':')[1])
 
-df.to_csv('/'.join(output_file_path.split('/')[:-1]) + '/CytoSPACE_SinglecellMapping_result.txt')
+#df.to_csv('/'.join(output_file_path.split('/')[:-1]) + '/CytoSPACE_SinglecellMapping_result.txt')
+df.to_csv(output_file_path + '/CytoSPACE_SinglecellMapping_result.txt')
 
 ss_res = df.copy()
 ss_res = pd.pivot_table(ss_res[['spot_index','cell_type_cytospace']],index=['spot_index'],columns=['cell_type_cytospace'], aggfunc=len, fill_value=0).reset_index()
 ss_res = ss_res.set_index("spot_index")
 ss_res = pd.DataFrame(ss_res.values/ss_res.values.sum(1)[:,None],columns=ss_res.columns,index=ss_res.index)
 ss_res = adata_st.obs[[]].merge(ss_res,left_index=True,right_index=True,how='left')
 ss_res = ss_res.fillna(0)
-ss_res.to_csv('/'.join(output_file_path.split('/')[:-1]) + '/CytoSPACE_result.txt')
+#ss_res.to_csv('/'.join(output_file_path.split('/')[:-1]) + '/CytoSPACE_result.txt')
+ss_res.to_csv(output_file_path + '/CytoSPACE_result.txt')
diff --git a/compared_methods/DSTG_pipeline.sh b/compared_methods/DSTG_pipeline.sh
@@ -0,0 +1,28 @@
+set -ex
+
+scrna_path=$1
+spatial_path=$2
+celltype_key=$3
+output_path=$4
+
+
+echo $scrna_path
+echo $spatial_path
+echo $celltype_key
+
+cd /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/DSTG/DSTG
+
+Rscript convert_data_h5ad.r $scrna_path $celltype_key $spatial_path
+
+Rscript convert_data.R scRNAseq_data.RDS spatial_data.RDS scRNAseq_label.RDS 
+
+python train.py
+
+cp ./DSTG_Result/predict_output.csv $output_path/DSTG_output.csv
+
+# ./DSTG_pipeline.sh \
+# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5seurat \
+# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/spatial.h5seurat \
+# cell_type
+
+# ~/spatial/SpatialScope/compared_methods/DSTG/DSTG/DSTG_Result/predict_output.csv
diff --git a/compared_methods/DestVI_pipeline.py b/compared_methods/DestVI_pipeline.py
@@ -0,0 +1,66 @@
+import scanpy as sc
+import numpy as np
+import pandas as pd
+
+import scvi
+from scvi.model import CondSCVI, DestVI
+
+import sys
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+
+# python DestVI_pipeline.py \
+# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5ad \
+# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/spatial.h5ad \
+# cell_type \
+# ./test
+
+scrna_path = sys.argv[1]
+spatial_path = sys.argv[2]
+celltype_key = sys.argv[3]
+output_path = sys.argv[4]
+
+sc_adata = sc.read_h5ad(scrna_path)
+st_adata = sc.read_h5ad(spatial_path)
+
+# filter genes to be the same on the spatial data
+intersect = np.intersect1d(sc_adata.var_names, st_adata.var_names)
+st_adata = st_adata[:, intersect].copy()
+sc_adata = sc_adata[:, intersect].copy()
+G = len(intersect)
+
+# let us filter some genes
+G = 2000
+sc.pp.filter_genes(sc_adata, min_counts=10)
+
+sc_adata.layers["counts"] = sc_adata.X.copy()
+
+sc.pp.highly_variable_genes(
+ sc_adata,
+ n_top_genes=G,
+ subset=True,
+ layer="counts",
+ flavor="seurat_v3"
+)
+
+sc.pp.normalize_total(sc_adata, target_sum=10e4)
+sc.pp.log1p(sc_adata)
+sc_adata.raw = sc_adata
+st_adata.layers["counts"] = st_adata.X.copy()
+sc.pp.normalize_total(st_adata, target_sum=10e4)
+sc.pp.log1p(st_adata)
+st_adata.raw = st_adata
+# filter genes to be the same on the spatial data
+intersect = np.intersect1d(sc_adata.var_names, st_adata.var_names)
+st_adata = st_adata[:, intersect].copy()
+sc_adata = sc_adata[:, intersect].copy()
+G = len(intersect)
+CondSCVI.setup_anndata(sc_adata, layer="counts", labels_key=celltype_key)
+sc_model = CondSCVI(sc_adata, weight_obs=True)
+sc_model.train(max_epochs=250,lr=0.0001)
+sc_model.history["elbo_train"].plot()
+DestVI.setup_anndata(st_adata, layer="counts")
+st_model = DestVI.from_rna_model(st_adata, sc_model)
+st_model.train(max_epochs=2500)
+st_model.history["elbo_train"].plot()
+st_model.get_proportions().to_csv(output_path + '/DestVI_result.txt')
diff --git a/compared_methods/SPOTlight_pipeline.r b/compared_methods/SPOTlight_pipeline.r
@@ -0,0 +1,53 @@
+library(Matrix)
+library(data.table)
+library(Seurat)
+library(dplyr)
+library(SPOTlight)
+library(igraph)
+library(RColorBrewer)
+library(SeuratDisk)
+library(future)
+# check the current active plan
+plan("multiprocess", workers = 8)
+options(future.globals.maxSize= 3*1024*1024^2)
+
+args<-commandArgs(T)
+scrna_path = args[1]
+spatial_path = args[2]
+celltype_final = args[3]
+output_path = args[4]
+
+sc <- LoadH5Seurat(scrna_path)
+st <- LoadH5Seurat(spatial_path,meta.data=F)
+
+set.seed(123)
+sc <- Seurat::SCTransform(sc, verbose = FALSE)
+
+Idents(sc) <- sc@meta.data[,celltype_final]
+
+cluster_markers_all <- FindAllMarkers(object = sc, 
+ assay = "SCT",
+ slot = "data",
+ verbose = TRUE, 
+ only.pos = TRUE)
+
+spotlight_ls <- spotlight_deconvolution(
+ se_sc = sc,
+ counts_spatial = st@assays$RNA@counts,
+ clust_vr = celltype_final, # Variable in sc_seu containing the cell-type annotation
+ cluster_markers = cluster_markers_all, # Dataframe with the marker genes
+ cl_n = 100, # number of cells per cell type to use
+ hvg = 3000, # Number of HVG to use
+ ntop = NULL, # How many of the marker genes to use (by default all)
+ transf = "uv", # Perform unit-variance scaling per cell and spot prior to factorzation and NLS
+ method = "nsNMF", # Factorization method
+ min_cont = 0 # Remove those cells contributing to a spot below a certain threshold 
+ )
+decon_mtrx <- spotlight_ls[[2]]
+write.csv(decon_mtrx[,which(colnames(decon_mtrx) != "res_ss")], paste0(output_path, '/SPOTlight_result.txt'))
+
+# Rscript SPOTlight_pipeline.r \
+# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5seurat \
+# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/spatial.h5seurat \
+# cell_type \
+# ./test
diff --git a/compared_methods/STRIDE_pipeline.sh b/compared_methods/STRIDE_pipeline.sh
@@ -0,0 +1,30 @@
+set -ex
+
+scrna_path=$1
+spatial_path=$2
+celltype_key=$3
+output_path=$4
+
+prefix='STRIDE'
+
+
+python /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/covert_data.py $scrna_path $celltype_key $spatial_path
+
+topics=`awk '{print $2}' /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/scRNAseq_label.csv | sort | uniq | wc -l`
+
+echo $topics
+
+STRIDE deconvolve --sc-count /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/scRNAseq_data.csv \
+--sc-celltype /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/scRNAseq_label.csv \
+--st-count /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/spatial_data.csv \
+--outdir $output_path --outprefix $prefix --normalize --gene-use All --st-scale-factor 300 --sc-scale-factor 300 --ntopics $topics
+
+# ./STRIDE_pipeline.sh \
+# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5ad \
+# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/spatial.h5ad \
+# cell_type \
+# ./test
+# /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE_pipeline.sh \
+# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5ad \
+# /home/share/xwanaf/sour_sep/revision/data/RCTDnv_MOp/260UMI/spatial.h5ad cell_type \
+# /home/share/xiaojs/spatial/benchmarking/MOp_260UMI
diff --git a/compared_methods/SpaOTsc_pipeline.py b/compared_methods/SpaOTsc_pipeline.py
@@ -0,0 +1,57 @@
+# basic imports
+import scanpy as sc
+import pandas as pd
+import numpy as np
+from scipy.spatial import distance_matrix
+from spaotsc import SpaOTsc
+from scipy import stats
+import tangram as tg
+import sys
+# Example
+# python ~/bio/SpatialBenchmarking/Codes/Deconvolution/SpaOTsc_pipeline.py \
+# /home/share/xiaojs/spatial/sour_sep/mouce_brain_VISp/Ref_scRNA_VISp_qc2.h5ad \
+# /home/share/xiaojs/spatial/sour_sep/tangram/merfish/MERFISH_mop.h5ad \
+# cell_subclass \
+# /home/share/xwanaf/sour_sep/simulation/SpaOTsc_test
+
+
+sc_file_path = sys.argv[1]
+spatial_file_path = sys.argv[2]
+celltype_key = sys.argv[3]
+output_file_path = sys.argv[4]
+
+ad_sc = sc.read(sc_file_path)
+ad_sp = sc.read(spatial_file_path)
+df_sc = ad_sc.to_df()
+df_IS = ad_sp.to_df()
+
+try:
+ pts = ad_sp.obs[['X','Y']].values
+except:
+ pts = ad_sp.obs[['x','y']].values
+is_dmat = distance_matrix(pts, pts)
+
+df_is=df_IS
+
+gene_is=df_is.columns.tolist()
+gene_sc=df_sc.columns.tolist()
+gene_overloap=list(set(gene_is).intersection(gene_sc))
+a=df_is[gene_overloap]
+b=df_sc[gene_overloap]
+
+
+rho, pval = stats.spearmanr(a, b,axis=1)
+rho[np.isnan(rho)]=0
+mcc=rho[-(len(df_sc)):,0:len(df_is)]
+C = np.exp(1-mcc)
+
+issc = SpaOTsc.spatial_sc(sc_data=df_sc, is_data=df_is, is_dmat = is_dmat)
+
+issc.transport_plan(C**2, alpha=0, rho=1.0, epsilon=1.0, cor_matrix=mcc, scaling=False)
+gamma = issc.gamma_mapping
+for j in range(gamma.shape[1]):
+ gamma[:,j] = gamma[:,j]/np.sum(gamma[:,j])
+ad_map = sc.AnnData(gamma,obs = ad_sc.obs, var=ad_sp.obs)
+tg.project_cell_annotations(ad_map, ad_sp, annotation=celltype_key)
+ad_sp.obsm['tangram_ct_pred'].to_csv(output_file_path + '/SpaOTsc_decon.csv')
+
diff --git a/compared_methods/TangramCell_pipeline.py b/compared_methods/TangramCell_pipeline.py
@@ -82,7 +82,14 @@ def f7(seq):
 cell_locations = ad_sp.uns['cell_locations'].copy()
 
 
-
+if 'x' in ad_sp.obs.columns:
+ ad_sp.obs['X'] = ad_sp.obs['x']
+ ad_sp.obs['Y'] = ad_sp.obs['y']
+ cell_locations['X'] = cell_locations['x']
+ cell_locations['Y'] = cell_locations['y']
+if 'cell_nums' in ad_sp.obs.columns:
+ ad_sp.obs['cell_num'] = ad_sp.obs['cell_nums']
+ cell_locations['cell_num'] = cell_locations['cell_nums']
 
 ad_sp.obsm['spatial'] = ad_sp.obs[['X','Y']].values
 ad_sp_obsm_image_features = pd.DataFrame()

diff --git a/compared_methods/novoSpaRc_pipeline.py b/compared_methods/novoSpaRc_pipeline.py
@@ -0,0 +1,68 @@
+# basic imports
+import scanpy as sc
+import pandas as pd
+import numpy as np
+from scipy.spatial import distance_matrix
+import novosparc as nc
+from scipy import stats
+import tangram as tg
+from scipy.spatial.distance import cdist
+import sys
+
+# Example
+# python ~/bio/SpatialBenchmarking/Codes/Deconvolution/novoSpaRc_pipeline.py \
+# /home/share/xiaojs/spatial/sour_sep/mouce_brain_VISp/Ref_scRNA_VISp_qc2.h5ad \
+# /home/share/xiaojs/spatial/sour_sep/tangram/merfish/MERFISH_mop.h5ad \
+# cell_subclass \
+# /home/share/xwanaf/sour_sep/simulation/novoSpaRc_test
+
+
+sc_file_path = sys.argv[1]
+spatial_file_path = sys.argv[2]
+celltype_key = sys.argv[3]
+output_file_path = sys.argv[4]
+
+ad_sc = sc.read(sc_file_path)
+ad_sp = sc.read(spatial_file_path)
+gene_names = np.array(ad_sc.var.index.values)
+dge = ad_sc.to_df().values
+num_cells = dge.shape[0]
+print ('number of cells and genes in the matrix:', dge.shape)
+
+hvg = np.argsort(np.divide(np.var(dge,axis=0),np.mean(dge,axis=0)+0.0001))
+dge_hvg = dge[:,hvg[-2000:]]
+
+try:
+ locations = ad_sp.obs[['X','Y']].values
+except:
+ locations = ad_sp.obs[['x','y']].values
+
+num_locations = locations.shape[0]
+
+p_location, p_expression = nc.rc.create_space_distributions(num_locations, num_cells)
+cost_expression, cost_locations = nc.rc.setup_for_OT_reconstruction(dge_hvg,locations,num_neighbors_source = 5,num_neighbors_target = 5)
+
+gene_is=ad_sp.var.index.tolist()
+gene_sc=ad_sc.var.index.tolist()
+insitu_genes=list(set(gene_is).intersection(gene_sc))
+
+markers_in_sc = np.array([], dtype='int')
+for marker in insitu_genes:
+ marker_index = np.where(gene_names == marker)[0]
+ if len(marker_index) > 0:
+ markers_in_sc = np.append(markers_in_sc, marker_index[0])
+
+insitu_matrix = np.array(ad_sp.to_df()[insitu_genes])
+cost_marker_genes = cdist(dge[:, markers_in_sc]/np.amax(dge[:, markers_in_sc]),insitu_matrix/np.amax(insitu_matrix))
+
+alpha_linear = 0.5
+gw = nc.rc._GWadjusted.gromov_wasserstein_adjusted_norm(cost_marker_genes, cost_expression, cost_locations,alpha_linear, p_expression, p_location,'square_loss', epsilon=5e-3, verbose=True)
+
+gamma = gw
+for j in range(gamma.shape[1]):
+ gamma[:,j] = gamma[:,j]/np.sum(gamma[:,j])
+ad_map = sc.AnnData(gamma,obs = ad_sc.obs, var=ad_sp.obs)
+tg.project_cell_annotations(ad_map, ad_sp, annotation=celltype_key)
+
+ad_sp.obsm['tangram_ct_pred'].to_csv(output_file_path + '/novoSpaRc_decon.csv')
+