Skip to content

Commit

Permalink
add compared methods
Browse files Browse the repository at this point in the history
  • Loading branch information
JiaShun-Xiao committed Jun 20, 2023
1 parent 3dcbfbb commit d77ed81
Show file tree
Hide file tree
Showing 11 changed files with 347 additions and 16 deletions.
2 changes: 2 additions & 0 deletions compared_methods/Cell2location_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

import os

os.environ["CUDA_VISIBLE_DEVICES"] = '0'

import cell2location
import scvi

Expand Down
26 changes: 22 additions & 4 deletions compared_methods/CytoSPACE_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,18 @@
adata_st.var_names_make_unique()
adata_st.obs_names_make_unique()

if adata_st.X.max()<30:
try:
adata_st.X = np.exp(adata_st.X) - 1
except:
adata_st.X = np.exp(adata_st.X.toarray()) - 1

if adata_sc.X.max()<30:
try:
adata_sc.X = np.exp(adata_sc.X) - 1
except:
adata_sc.X = np.exp(adata_sc.X.toarray()) - 1

sel_genes = list(set(adata_st.var.index.values)&set(adata_sc.var.index.values))

adata_sc = adata_sc[:,sel_genes]
Expand Down Expand Up @@ -64,8 +76,12 @@
sp_express_df.to_csv(sp_express_df_file_name,sep='\t')


adata_st.obs['row'] = adata_st.obs['X']
adata_st.obs['col'] = adata_st.obs['Y']
try:
adata_st.obs['row'] = adata_st.obs['X']
adata_st.obs['col'] = adata_st.obs['Y']
except:
adata_st.obs['row'] = adata_st.obs['x']
adata_st.obs['col'] = adata_st.obs['y']
sp_coord_df = adata_st.obs[['row','col']]
sp_coord_df.index.name = 'SpotID'
sp_coord_df_file = tempfile.NamedTemporaryFile(suffix='.txt')
Expand Down Expand Up @@ -100,12 +116,14 @@
df['cell_type_cytospace'] = df['cytospace_result'].apply(lambda x:x.split(':')[0])
df['cell_index_cytospace'] = df['cytospace_result'].apply(lambda x:x.split(':')[1])

df.to_csv('/'.join(output_file_path.split('/')[:-1]) + '/CytoSPACE_SinglecellMapping_result.txt')
#df.to_csv('/'.join(output_file_path.split('/')[:-1]) + '/CytoSPACE_SinglecellMapping_result.txt')
df.to_csv(output_file_path + '/CytoSPACE_SinglecellMapping_result.txt')

ss_res = df.copy()
ss_res = pd.pivot_table(ss_res[['spot_index','cell_type_cytospace']],index=['spot_index'],columns=['cell_type_cytospace'], aggfunc=len, fill_value=0).reset_index()
ss_res = ss_res.set_index("spot_index")
ss_res = pd.DataFrame(ss_res.values/ss_res.values.sum(1)[:,None],columns=ss_res.columns,index=ss_res.index)
ss_res = adata_st.obs[[]].merge(ss_res,left_index=True,right_index=True,how='left')
ss_res = ss_res.fillna(0)
ss_res.to_csv('/'.join(output_file_path.split('/')[:-1]) + '/CytoSPACE_result.txt')
#ss_res.to_csv('/'.join(output_file_path.split('/')[:-1]) + '/CytoSPACE_result.txt')
ss_res.to_csv(output_file_path + '/CytoSPACE_result.txt')
28 changes: 28 additions & 0 deletions compared_methods/DSTG_pipeline.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
set -ex

scrna_path=$1
spatial_path=$2
celltype_key=$3
output_path=$4


echo $scrna_path
echo $spatial_path
echo $celltype_key

cd /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/DSTG/DSTG

Rscript convert_data_h5ad.r $scrna_path $celltype_key $spatial_path

Rscript convert_data.R scRNAseq_data.RDS spatial_data.RDS scRNAseq_label.RDS

python train.py

cp ./DSTG_Result/predict_output.csv $output_path/DSTG_output.csv

# ./DSTG_pipeline.sh \
# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5seurat \
# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/spatial.h5seurat \
# cell_type

# ~/spatial/SpatialScope/compared_methods/DSTG/DSTG/DSTG_Result/predict_output.csv
66 changes: 66 additions & 0 deletions compared_methods/DestVI_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import scanpy as sc
import numpy as np
import pandas as pd

import scvi
from scvi.model import CondSCVI, DestVI

import sys
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

# python DestVI_pipeline.py \
# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5ad \
# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/spatial.h5ad \
# cell_type \
# ./test

scrna_path = sys.argv[1]
spatial_path = sys.argv[2]
celltype_key = sys.argv[3]
output_path = sys.argv[4]

sc_adata = sc.read_h5ad(scrna_path)
st_adata = sc.read_h5ad(spatial_path)

# filter genes to be the same on the spatial data
intersect = np.intersect1d(sc_adata.var_names, st_adata.var_names)
st_adata = st_adata[:, intersect].copy()
sc_adata = sc_adata[:, intersect].copy()
G = len(intersect)

# let us filter some genes
G = 2000
sc.pp.filter_genes(sc_adata, min_counts=10)

sc_adata.layers["counts"] = sc_adata.X.copy()

sc.pp.highly_variable_genes(
sc_adata,
n_top_genes=G,
subset=True,
layer="counts",
flavor="seurat_v3"
)

sc.pp.normalize_total(sc_adata, target_sum=10e4)
sc.pp.log1p(sc_adata)
sc_adata.raw = sc_adata
st_adata.layers["counts"] = st_adata.X.copy()
sc.pp.normalize_total(st_adata, target_sum=10e4)
sc.pp.log1p(st_adata)
st_adata.raw = st_adata
# filter genes to be the same on the spatial data
intersect = np.intersect1d(sc_adata.var_names, st_adata.var_names)
st_adata = st_adata[:, intersect].copy()
sc_adata = sc_adata[:, intersect].copy()
G = len(intersect)
CondSCVI.setup_anndata(sc_adata, layer="counts", labels_key=celltype_key)
sc_model = CondSCVI(sc_adata, weight_obs=True)
sc_model.train(max_epochs=250,lr=0.0001)
sc_model.history["elbo_train"].plot()
DestVI.setup_anndata(st_adata, layer="counts")
st_model = DestVI.from_rna_model(st_adata, sc_model)
st_model.train(max_epochs=2500)
st_model.history["elbo_train"].plot()
st_model.get_proportions().to_csv(output_path + '/DestVI_result.txt')
53 changes: 53 additions & 0 deletions compared_methods/SPOTlight_pipeline.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
library(Matrix)
library(data.table)
library(Seurat)
library(dplyr)
library(SPOTlight)
library(igraph)
library(RColorBrewer)
library(SeuratDisk)
library(future)
# check the current active plan
plan("multiprocess", workers = 8)
options(future.globals.maxSize= 3*1024*1024^2)

args<-commandArgs(T)
scrna_path = args[1]
spatial_path = args[2]
celltype_final = args[3]
output_path = args[4]

sc <- LoadH5Seurat(scrna_path)
st <- LoadH5Seurat(spatial_path,meta.data=F)

set.seed(123)
sc <- Seurat::SCTransform(sc, verbose = FALSE)

Idents(sc) <- sc@meta.data[,celltype_final]

cluster_markers_all <- FindAllMarkers(object = sc,
assay = "SCT",
slot = "data",
verbose = TRUE,
only.pos = TRUE)

spotlight_ls <- spotlight_deconvolution(
se_sc = sc,
counts_spatial = st@assays$RNA@counts,
clust_vr = celltype_final, # Variable in sc_seu containing the cell-type annotation
cluster_markers = cluster_markers_all, # Dataframe with the marker genes
cl_n = 100, # number of cells per cell type to use
hvg = 3000, # Number of HVG to use
ntop = NULL, # How many of the marker genes to use (by default all)
transf = "uv", # Perform unit-variance scaling per cell and spot prior to factorzation and NLS
method = "nsNMF", # Factorization method
min_cont = 0 # Remove those cells contributing to a spot below a certain threshold
)
decon_mtrx <- spotlight_ls[[2]]
write.csv(decon_mtrx[,which(colnames(decon_mtrx) != "res_ss")], paste0(output_path, '/SPOTlight_result.txt'))

# Rscript SPOTlight_pipeline.r \
# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5seurat \
# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/spatial.h5seurat \
# cell_type \
# ./test
30 changes: 30 additions & 0 deletions compared_methods/STRIDE_pipeline.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
set -ex

scrna_path=$1
spatial_path=$2
celltype_key=$3
output_path=$4

prefix='STRIDE'


python /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/covert_data.py $scrna_path $celltype_key $spatial_path

topics=`awk '{print $2}' /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/scRNAseq_label.csv | sort | uniq | wc -l`

echo $topics

STRIDE deconvolve --sc-count /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/scRNAseq_data.csv \
--sc-celltype /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/scRNAseq_label.csv \
--st-count /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/spatial_data.csv \
--outdir $output_path --outprefix $prefix --normalize --gene-use All --st-scale-factor 300 --sc-scale-factor 300 --ntopics $topics

# ./STRIDE_pipeline.sh \
# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5ad \
# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/spatial.h5ad \
# cell_type \
# ./test
# /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE_pipeline.sh \
# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5ad \
# /home/share/xwanaf/sour_sep/revision/data/RCTDnv_MOp/260UMI/spatial.h5ad cell_type \
# /home/share/xiaojs/spatial/benchmarking/MOp_260UMI
57 changes: 57 additions & 0 deletions compared_methods/SpaOTsc_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# basic imports
import scanpy as sc
import pandas as pd
import numpy as np
from scipy.spatial import distance_matrix
from spaotsc import SpaOTsc
from scipy import stats
import tangram as tg
import sys
# Example
# python ~/bio/SpatialBenchmarking/Codes/Deconvolution/SpaOTsc_pipeline.py \
# /home/share/xiaojs/spatial/sour_sep/mouce_brain_VISp/Ref_scRNA_VISp_qc2.h5ad \
# /home/share/xiaojs/spatial/sour_sep/tangram/merfish/MERFISH_mop.h5ad \
# cell_subclass \
# /home/share/xwanaf/sour_sep/simulation/SpaOTsc_test


sc_file_path = sys.argv[1]
spatial_file_path = sys.argv[2]
celltype_key = sys.argv[3]
output_file_path = sys.argv[4]

ad_sc = sc.read(sc_file_path)
ad_sp = sc.read(spatial_file_path)
df_sc = ad_sc.to_df()
df_IS = ad_sp.to_df()

try:
pts = ad_sp.obs[['X','Y']].values
except:
pts = ad_sp.obs[['x','y']].values
is_dmat = distance_matrix(pts, pts)

df_is=df_IS

gene_is=df_is.columns.tolist()
gene_sc=df_sc.columns.tolist()
gene_overloap=list(set(gene_is).intersection(gene_sc))
a=df_is[gene_overloap]
b=df_sc[gene_overloap]


rho, pval = stats.spearmanr(a, b,axis=1)
rho[np.isnan(rho)]=0
mcc=rho[-(len(df_sc)):,0:len(df_is)]
C = np.exp(1-mcc)

issc = SpaOTsc.spatial_sc(sc_data=df_sc, is_data=df_is, is_dmat = is_dmat)

issc.transport_plan(C**2, alpha=0, rho=1.0, epsilon=1.0, cor_matrix=mcc, scaling=False)
gamma = issc.gamma_mapping
for j in range(gamma.shape[1]):
gamma[:,j] = gamma[:,j]/np.sum(gamma[:,j])
ad_map = sc.AnnData(gamma,obs = ad_sc.obs, var=ad_sp.obs)
tg.project_cell_annotations(ad_map, ad_sp, annotation=celltype_key)
ad_sp.obsm['tangram_ct_pred'].to_csv(output_file_path + '/SpaOTsc_decon.csv')

9 changes: 8 additions & 1 deletion compared_methods/TangramCell_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,14 @@ def f7(seq):
cell_locations = ad_sp.uns['cell_locations'].copy()



if 'x' in ad_sp.obs.columns:
ad_sp.obs['X'] = ad_sp.obs['x']
ad_sp.obs['Y'] = ad_sp.obs['y']
cell_locations['X'] = cell_locations['x']
cell_locations['Y'] = cell_locations['y']
if 'cell_nums' in ad_sp.obs.columns:
ad_sp.obs['cell_num'] = ad_sp.obs['cell_nums']
cell_locations['cell_num'] = cell_locations['cell_nums']

ad_sp.obsm['spatial'] = ad_sp.obs[['X','Y']].values
ad_sp_obsm_image_features = pd.DataFrame()
Expand Down
68 changes: 68 additions & 0 deletions compared_methods/novoSpaRc_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# basic imports
import scanpy as sc
import pandas as pd
import numpy as np
from scipy.spatial import distance_matrix
import novosparc as nc
from scipy import stats
import tangram as tg
from scipy.spatial.distance import cdist
import sys

# Example
# python ~/bio/SpatialBenchmarking/Codes/Deconvolution/novoSpaRc_pipeline.py \
# /home/share/xiaojs/spatial/sour_sep/mouce_brain_VISp/Ref_scRNA_VISp_qc2.h5ad \
# /home/share/xiaojs/spatial/sour_sep/tangram/merfish/MERFISH_mop.h5ad \
# cell_subclass \
# /home/share/xwanaf/sour_sep/simulation/novoSpaRc_test


sc_file_path = sys.argv[1]
spatial_file_path = sys.argv[2]
celltype_key = sys.argv[3]
output_file_path = sys.argv[4]

ad_sc = sc.read(sc_file_path)
ad_sp = sc.read(spatial_file_path)
gene_names = np.array(ad_sc.var.index.values)
dge = ad_sc.to_df().values
num_cells = dge.shape[0]
print ('number of cells and genes in the matrix:', dge.shape)

hvg = np.argsort(np.divide(np.var(dge,axis=0),np.mean(dge,axis=0)+0.0001))
dge_hvg = dge[:,hvg[-2000:]]

try:
locations = ad_sp.obs[['X','Y']].values
except:
locations = ad_sp.obs[['x','y']].values

num_locations = locations.shape[0]

p_location, p_expression = nc.rc.create_space_distributions(num_locations, num_cells)
cost_expression, cost_locations = nc.rc.setup_for_OT_reconstruction(dge_hvg,locations,num_neighbors_source = 5,num_neighbors_target = 5)

gene_is=ad_sp.var.index.tolist()
gene_sc=ad_sc.var.index.tolist()
insitu_genes=list(set(gene_is).intersection(gene_sc))

markers_in_sc = np.array([], dtype='int')
for marker in insitu_genes:
marker_index = np.where(gene_names == marker)[0]
if len(marker_index) > 0:
markers_in_sc = np.append(markers_in_sc, marker_index[0])

insitu_matrix = np.array(ad_sp.to_df()[insitu_genes])
cost_marker_genes = cdist(dge[:, markers_in_sc]/np.amax(dge[:, markers_in_sc]),insitu_matrix/np.amax(insitu_matrix))

alpha_linear = 0.5
gw = nc.rc._GWadjusted.gromov_wasserstein_adjusted_norm(cost_marker_genes, cost_expression, cost_locations,alpha_linear, p_expression, p_location,'square_loss', epsilon=5e-3, verbose=True)

gamma = gw
for j in range(gamma.shape[1]):
gamma[:,j] = gamma[:,j]/np.sum(gamma[:,j])
ad_map = sc.AnnData(gamma,obs = ad_sc.obs, var=ad_sp.obs)
tg.project_cell_annotations(ad_map, ad_sp, annotation=celltype_key)

ad_sp.obsm['tangram_ct_pred'].to_csv(output_file_path + '/novoSpaRc_decon.csv')

Loading

0 comments on commit d77ed81

Please sign in to comment.