From d77ed8151a6c7f6d0d7169f2723076ee39980473 Mon Sep 17 00:00:00 2001 From: JiaShun-Xiao Date: Wed, 21 Jun 2023 00:49:10 +0800 Subject: [PATCH] add compared methods --- compared_methods/Cell2location_pipeline.py | 2 + compared_methods/CytoSPACE_pipeline.py | 26 +++++++-- compared_methods/DSTG_pipeline.sh | 28 +++++++++ compared_methods/DestVI_pipeline.py | 66 +++++++++++++++++++++ compared_methods/SPOTlight_pipeline.r | 53 +++++++++++++++++ compared_methods/STRIDE_pipeline.sh | 30 ++++++++++ compared_methods/SpaOTsc_pipeline.py | 57 ++++++++++++++++++ compared_methods/TangramCell_pipeline.py | 9 ++- compared_methods/novoSpaRc_pipeline.py | 68 ++++++++++++++++++++++ src/Cell_Type_Identification.py | 8 +-- src/Nuclei_Segmentation.py | 16 ++--- 11 files changed, 347 insertions(+), 16 deletions(-) create mode 100755 compared_methods/DSTG_pipeline.sh create mode 100644 compared_methods/DestVI_pipeline.py create mode 100644 compared_methods/SPOTlight_pipeline.r create mode 100755 compared_methods/STRIDE_pipeline.sh create mode 100644 compared_methods/SpaOTsc_pipeline.py create mode 100644 compared_methods/novoSpaRc_pipeline.py diff --git a/compared_methods/Cell2location_pipeline.py b/compared_methods/Cell2location_pipeline.py index defc929..4d38aeb 100644 --- a/compared_methods/Cell2location_pipeline.py +++ b/compared_methods/Cell2location_pipeline.py @@ -9,6 +9,8 @@ import os +os.environ["CUDA_VISIBLE_DEVICES"] = '0' + import cell2location import scvi diff --git a/compared_methods/CytoSPACE_pipeline.py b/compared_methods/CytoSPACE_pipeline.py index 184c1ff..74e2da7 100644 --- a/compared_methods/CytoSPACE_pipeline.py +++ b/compared_methods/CytoSPACE_pipeline.py @@ -34,6 +34,18 @@ adata_st.var_names_make_unique() adata_st.obs_names_make_unique() +if adata_st.X.max()<30: + try: + adata_st.X = np.exp(adata_st.X) - 1 + except: + adata_st.X = np.exp(adata_st.X.toarray()) - 1 + +if adata_sc.X.max()<30: + try: + adata_sc.X = np.exp(adata_sc.X) - 1 + except: + adata_sc.X = np.exp(adata_sc.X.toarray()) - 1 + sel_genes = list(set(adata_st.var.index.values)&set(adata_sc.var.index.values)) adata_sc = adata_sc[:,sel_genes] @@ -64,8 +76,12 @@ sp_express_df.to_csv(sp_express_df_file_name,sep='\t') -adata_st.obs['row'] = adata_st.obs['X'] -adata_st.obs['col'] = adata_st.obs['Y'] +try: + adata_st.obs['row'] = adata_st.obs['X'] + adata_st.obs['col'] = adata_st.obs['Y'] +except: + adata_st.obs['row'] = adata_st.obs['x'] + adata_st.obs['col'] = adata_st.obs['y'] sp_coord_df = adata_st.obs[['row','col']] sp_coord_df.index.name = 'SpotID' sp_coord_df_file = tempfile.NamedTemporaryFile(suffix='.txt') @@ -100,7 +116,8 @@ df['cell_type_cytospace'] = df['cytospace_result'].apply(lambda x:x.split(':')[0]) df['cell_index_cytospace'] = df['cytospace_result'].apply(lambda x:x.split(':')[1]) -df.to_csv('/'.join(output_file_path.split('/')[:-1]) + '/CytoSPACE_SinglecellMapping_result.txt') +#df.to_csv('/'.join(output_file_path.split('/')[:-1]) + '/CytoSPACE_SinglecellMapping_result.txt') +df.to_csv(output_file_path + '/CytoSPACE_SinglecellMapping_result.txt') ss_res = df.copy() ss_res = pd.pivot_table(ss_res[['spot_index','cell_type_cytospace']],index=['spot_index'],columns=['cell_type_cytospace'], aggfunc=len, fill_value=0).reset_index() @@ -108,4 +125,5 @@ ss_res = pd.DataFrame(ss_res.values/ss_res.values.sum(1)[:,None],columns=ss_res.columns,index=ss_res.index) ss_res = adata_st.obs[[]].merge(ss_res,left_index=True,right_index=True,how='left') ss_res = ss_res.fillna(0) -ss_res.to_csv('/'.join(output_file_path.split('/')[:-1]) + '/CytoSPACE_result.txt') \ No newline at end of file +#ss_res.to_csv('/'.join(output_file_path.split('/')[:-1]) + '/CytoSPACE_result.txt') +ss_res.to_csv(output_file_path + '/CytoSPACE_result.txt') \ No newline at end of file diff --git a/compared_methods/DSTG_pipeline.sh b/compared_methods/DSTG_pipeline.sh new file mode 100755 index 0000000..b9fc878 --- /dev/null +++ b/compared_methods/DSTG_pipeline.sh @@ -0,0 +1,28 @@ +set -ex + +scrna_path=$1 +spatial_path=$2 +celltype_key=$3 +output_path=$4 + + +echo $scrna_path +echo $spatial_path +echo $celltype_key + +cd /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/DSTG/DSTG + +Rscript convert_data_h5ad.r $scrna_path $celltype_key $spatial_path + +Rscript convert_data.R scRNAseq_data.RDS spatial_data.RDS scRNAseq_label.RDS + +python train.py + +cp ./DSTG_Result/predict_output.csv $output_path/DSTG_output.csv + +# ./DSTG_pipeline.sh \ +# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5seurat \ +# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/spatial.h5seurat \ +# cell_type + +# ~/spatial/SpatialScope/compared_methods/DSTG/DSTG/DSTG_Result/predict_output.csv diff --git a/compared_methods/DestVI_pipeline.py b/compared_methods/DestVI_pipeline.py new file mode 100644 index 0000000..e8b8ffa --- /dev/null +++ b/compared_methods/DestVI_pipeline.py @@ -0,0 +1,66 @@ +import scanpy as sc +import numpy as np +import pandas as pd + +import scvi +from scvi.model import CondSCVI, DestVI + +import sys +import os +os.environ["CUDA_VISIBLE_DEVICES"] = '0' + +# python DestVI_pipeline.py \ +# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5ad \ +# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/spatial.h5ad \ +# cell_type \ +# ./test + +scrna_path = sys.argv[1] +spatial_path = sys.argv[2] +celltype_key = sys.argv[3] +output_path = sys.argv[4] + +sc_adata = sc.read_h5ad(scrna_path) +st_adata = sc.read_h5ad(spatial_path) + +# filter genes to be the same on the spatial data +intersect = np.intersect1d(sc_adata.var_names, st_adata.var_names) +st_adata = st_adata[:, intersect].copy() +sc_adata = sc_adata[:, intersect].copy() +G = len(intersect) + +# let us filter some genes +G = 2000 +sc.pp.filter_genes(sc_adata, min_counts=10) + +sc_adata.layers["counts"] = sc_adata.X.copy() + +sc.pp.highly_variable_genes( + sc_adata, + n_top_genes=G, + subset=True, + layer="counts", + flavor="seurat_v3" +) + +sc.pp.normalize_total(sc_adata, target_sum=10e4) +sc.pp.log1p(sc_adata) +sc_adata.raw = sc_adata +st_adata.layers["counts"] = st_adata.X.copy() +sc.pp.normalize_total(st_adata, target_sum=10e4) +sc.pp.log1p(st_adata) +st_adata.raw = st_adata +# filter genes to be the same on the spatial data +intersect = np.intersect1d(sc_adata.var_names, st_adata.var_names) +st_adata = st_adata[:, intersect].copy() +sc_adata = sc_adata[:, intersect].copy() +G = len(intersect) +CondSCVI.setup_anndata(sc_adata, layer="counts", labels_key=celltype_key) +sc_model = CondSCVI(sc_adata, weight_obs=True) +sc_model.train(max_epochs=250,lr=0.0001) +sc_model.history["elbo_train"].plot() +DestVI.setup_anndata(st_adata, layer="counts") +st_model = DestVI.from_rna_model(st_adata, sc_model) +st_model.train(max_epochs=2500) +st_model.history["elbo_train"].plot() +st_model.get_proportions().to_csv(output_path + '/DestVI_result.txt') diff --git a/compared_methods/SPOTlight_pipeline.r b/compared_methods/SPOTlight_pipeline.r new file mode 100644 index 0000000..bfb0c39 --- /dev/null +++ b/compared_methods/SPOTlight_pipeline.r @@ -0,0 +1,53 @@ +library(Matrix) +library(data.table) +library(Seurat) +library(dplyr) +library(SPOTlight) +library(igraph) +library(RColorBrewer) +library(SeuratDisk) +library(future) +# check the current active plan +plan("multiprocess", workers = 8) +options(future.globals.maxSize= 3*1024*1024^2) + +args<-commandArgs(T) +scrna_path = args[1] +spatial_path = args[2] +celltype_final = args[3] +output_path = args[4] + +sc <- LoadH5Seurat(scrna_path) +st <- LoadH5Seurat(spatial_path,meta.data=F) + +set.seed(123) +sc <- Seurat::SCTransform(sc, verbose = FALSE) + +Idents(sc) <- sc@meta.data[,celltype_final] + +cluster_markers_all <- FindAllMarkers(object = sc, + assay = "SCT", + slot = "data", + verbose = TRUE, + only.pos = TRUE) + +spotlight_ls <- spotlight_deconvolution( + se_sc = sc, + counts_spatial = st@assays$RNA@counts, + clust_vr = celltype_final, # Variable in sc_seu containing the cell-type annotation + cluster_markers = cluster_markers_all, # Dataframe with the marker genes + cl_n = 100, # number of cells per cell type to use + hvg = 3000, # Number of HVG to use + ntop = NULL, # How many of the marker genes to use (by default all) + transf = "uv", # Perform unit-variance scaling per cell and spot prior to factorzation and NLS + method = "nsNMF", # Factorization method + min_cont = 0 # Remove those cells contributing to a spot below a certain threshold + ) +decon_mtrx <- spotlight_ls[[2]] +write.csv(decon_mtrx[,which(colnames(decon_mtrx) != "res_ss")], paste0(output_path, '/SPOTlight_result.txt')) + +# Rscript SPOTlight_pipeline.r \ +# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5seurat \ +# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/spatial.h5seurat \ +# cell_type \ +# ./test \ No newline at end of file diff --git a/compared_methods/STRIDE_pipeline.sh b/compared_methods/STRIDE_pipeline.sh new file mode 100755 index 0000000..9e0e0ed --- /dev/null +++ b/compared_methods/STRIDE_pipeline.sh @@ -0,0 +1,30 @@ +set -ex + +scrna_path=$1 +spatial_path=$2 +celltype_key=$3 +output_path=$4 + +prefix='STRIDE' + + +python /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/covert_data.py $scrna_path $celltype_key $spatial_path + +topics=`awk '{print $2}' /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/scRNAseq_label.csv | sort | uniq | wc -l` + +echo $topics + +STRIDE deconvolve --sc-count /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/scRNAseq_data.csv \ +--sc-celltype /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/scRNAseq_label.csv \ +--st-count /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/spatial_data.csv \ +--outdir $output_path --outprefix $prefix --normalize --gene-use All --st-scale-factor 300 --sc-scale-factor 300 --ntopics $topics + +# ./STRIDE_pipeline.sh \ +# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5ad \ +# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/spatial.h5ad \ +# cell_type \ +# ./test +# /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE_pipeline.sh \ +# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5ad \ +# /home/share/xwanaf/sour_sep/revision/data/RCTDnv_MOp/260UMI/spatial.h5ad cell_type \ +# /home/share/xiaojs/spatial/benchmarking/MOp_260UMI \ No newline at end of file diff --git a/compared_methods/SpaOTsc_pipeline.py b/compared_methods/SpaOTsc_pipeline.py new file mode 100644 index 0000000..eb8093d --- /dev/null +++ b/compared_methods/SpaOTsc_pipeline.py @@ -0,0 +1,57 @@ +# basic imports +import scanpy as sc +import pandas as pd +import numpy as np +from scipy.spatial import distance_matrix +from spaotsc import SpaOTsc +from scipy import stats +import tangram as tg +import sys +# Example +# python ~/bio/SpatialBenchmarking/Codes/Deconvolution/SpaOTsc_pipeline.py \ +# /home/share/xiaojs/spatial/sour_sep/mouce_brain_VISp/Ref_scRNA_VISp_qc2.h5ad \ +# /home/share/xiaojs/spatial/sour_sep/tangram/merfish/MERFISH_mop.h5ad \ +# cell_subclass \ +# /home/share/xwanaf/sour_sep/simulation/SpaOTsc_test + + +sc_file_path = sys.argv[1] +spatial_file_path = sys.argv[2] +celltype_key = sys.argv[3] +output_file_path = sys.argv[4] + +ad_sc = sc.read(sc_file_path) +ad_sp = sc.read(spatial_file_path) +df_sc = ad_sc.to_df() +df_IS = ad_sp.to_df() + +try: + pts = ad_sp.obs[['X','Y']].values +except: + pts = ad_sp.obs[['x','y']].values +is_dmat = distance_matrix(pts, pts) + +df_is=df_IS + +gene_is=df_is.columns.tolist() +gene_sc=df_sc.columns.tolist() +gene_overloap=list(set(gene_is).intersection(gene_sc)) +a=df_is[gene_overloap] +b=df_sc[gene_overloap] + + +rho, pval = stats.spearmanr(a, b,axis=1) +rho[np.isnan(rho)]=0 +mcc=rho[-(len(df_sc)):,0:len(df_is)] +C = np.exp(1-mcc) + +issc = SpaOTsc.spatial_sc(sc_data=df_sc, is_data=df_is, is_dmat = is_dmat) + +issc.transport_plan(C**2, alpha=0, rho=1.0, epsilon=1.0, cor_matrix=mcc, scaling=False) +gamma = issc.gamma_mapping +for j in range(gamma.shape[1]): + gamma[:,j] = gamma[:,j]/np.sum(gamma[:,j]) +ad_map = sc.AnnData(gamma,obs = ad_sc.obs, var=ad_sp.obs) +tg.project_cell_annotations(ad_map, ad_sp, annotation=celltype_key) +ad_sp.obsm['tangram_ct_pred'].to_csv(output_file_path + '/SpaOTsc_decon.csv') + diff --git a/compared_methods/TangramCell_pipeline.py b/compared_methods/TangramCell_pipeline.py index 2d1173e..e6274cd 100644 --- a/compared_methods/TangramCell_pipeline.py +++ b/compared_methods/TangramCell_pipeline.py @@ -82,7 +82,14 @@ def f7(seq): cell_locations = ad_sp.uns['cell_locations'].copy() - +if 'x' in ad_sp.obs.columns: + ad_sp.obs['X'] = ad_sp.obs['x'] + ad_sp.obs['Y'] = ad_sp.obs['y'] + cell_locations['X'] = cell_locations['x'] + cell_locations['Y'] = cell_locations['y'] +if 'cell_nums' in ad_sp.obs.columns: + ad_sp.obs['cell_num'] = ad_sp.obs['cell_nums'] + cell_locations['cell_num'] = cell_locations['cell_nums'] ad_sp.obsm['spatial'] = ad_sp.obs[['X','Y']].values ad_sp_obsm_image_features = pd.DataFrame() diff --git a/compared_methods/novoSpaRc_pipeline.py b/compared_methods/novoSpaRc_pipeline.py new file mode 100644 index 0000000..5be6627 --- /dev/null +++ b/compared_methods/novoSpaRc_pipeline.py @@ -0,0 +1,68 @@ +# basic imports +import scanpy as sc +import pandas as pd +import numpy as np +from scipy.spatial import distance_matrix +import novosparc as nc +from scipy import stats +import tangram as tg +from scipy.spatial.distance import cdist +import sys + +# Example +# python ~/bio/SpatialBenchmarking/Codes/Deconvolution/novoSpaRc_pipeline.py \ +# /home/share/xiaojs/spatial/sour_sep/mouce_brain_VISp/Ref_scRNA_VISp_qc2.h5ad \ +# /home/share/xiaojs/spatial/sour_sep/tangram/merfish/MERFISH_mop.h5ad \ +# cell_subclass \ +# /home/share/xwanaf/sour_sep/simulation/novoSpaRc_test + + +sc_file_path = sys.argv[1] +spatial_file_path = sys.argv[2] +celltype_key = sys.argv[3] +output_file_path = sys.argv[4] + +ad_sc = sc.read(sc_file_path) +ad_sp = sc.read(spatial_file_path) +gene_names = np.array(ad_sc.var.index.values) +dge = ad_sc.to_df().values +num_cells = dge.shape[0] +print ('number of cells and genes in the matrix:', dge.shape) + +hvg = np.argsort(np.divide(np.var(dge,axis=0),np.mean(dge,axis=0)+0.0001)) +dge_hvg = dge[:,hvg[-2000:]] + +try: + locations = ad_sp.obs[['X','Y']].values +except: + locations = ad_sp.obs[['x','y']].values + +num_locations = locations.shape[0] + +p_location, p_expression = nc.rc.create_space_distributions(num_locations, num_cells) +cost_expression, cost_locations = nc.rc.setup_for_OT_reconstruction(dge_hvg,locations,num_neighbors_source = 5,num_neighbors_target = 5) + +gene_is=ad_sp.var.index.tolist() +gene_sc=ad_sc.var.index.tolist() +insitu_genes=list(set(gene_is).intersection(gene_sc)) + +markers_in_sc = np.array([], dtype='int') +for marker in insitu_genes: + marker_index = np.where(gene_names == marker)[0] + if len(marker_index) > 0: + markers_in_sc = np.append(markers_in_sc, marker_index[0]) + +insitu_matrix = np.array(ad_sp.to_df()[insitu_genes]) +cost_marker_genes = cdist(dge[:, markers_in_sc]/np.amax(dge[:, markers_in_sc]),insitu_matrix/np.amax(insitu_matrix)) + +alpha_linear = 0.5 +gw = nc.rc._GWadjusted.gromov_wasserstein_adjusted_norm(cost_marker_genes, cost_expression, cost_locations,alpha_linear, p_expression, p_location,'square_loss', epsilon=5e-3, verbose=True) + +gamma = gw +for j in range(gamma.shape[1]): + gamma[:,j] = gamma[:,j]/np.sum(gamma[:,j]) +ad_map = sc.AnnData(gamma,obs = ad_sc.obs, var=ad_sp.obs) +tg.project_cell_annotations(ad_map, ad_sp, annotation=celltype_key) + +ad_sp.obsm['tangram_ct_pred'].to_csv(output_file_path + '/novoSpaRc_decon.csv') + diff --git a/src/Cell_Type_Identification.py b/src/Cell_Type_Identification.py index 55e6566..f3d0732 100644 --- a/src/Cell_Type_Identification.py +++ b/src/Cell_Type_Identification.py @@ -110,13 +110,13 @@ def WarmStart(self,hs_ST,UMI_min_sigma = 300): else: self.loggings.error('Wrong spatial location shape: {}'.format(self.sp_adata.obsm['spatial'].shape)) sys.exit() - + UMI_min = min(UMI_min,UMI_min_sigma) nUMI = pd.DataFrame(np.array(self.sp_adata.X.sum(-1)), index = self.sp_adata.obs.index) puck = SpatialRNA(coords, counts, nUMI) counts = self.sc_adata.to_df().T cell_types = pd.DataFrame(self.sc_adata.obs[self.cell_class_column]) nUMI = pd.DataFrame(self.sc_adata.to_df().T.sum(0)) - reference = Reference(counts, cell_types, nUMI) + reference = Reference(counts, cell_types, nUMI, loggings=self.loggings) myRCTD = create_RCTD(puck, reference, max_cores = 22, UMI_min=UMI_min, UMI_min_sigma = UMI_min_sigma, loggings = self.loggings) myRCTD = run_RCTD(myRCTD, self.Q_mat_all, self.X_vals_loc, loggings = self.loggings) self.InitProp = myRCTD @@ -170,7 +170,7 @@ def CellTypeIdentification(self, nu = 10, n_neighbo = 10, hs_ST = False, VisiumC with open(os.path.join(self.out_dir, 'InitProp.pickle'), 'rb') as handle: self.InitProp = pickle.load(handle) self.LoadLikelihoodTable() - + cell_locations = cell_locations.loc[cell_locations['spot_index'].isin(self.InitProp['results'].index.values)] CellTypeLabel = SingleCellTypeIdentification(self.InitProp, cell_locations, 'spot_index', self.Q_mat_all, self.X_vals_loc, nu = nu, n_epoch = 8, n_neighbo = n_neighbo, loggings = self.loggings, hs_ST = hs_ST) label2ct = CellTypeLabel['label2ct'] discrete_label = CellTypeLabel['discrete_label'].copy() @@ -217,7 +217,7 @@ def CellTypeIdentification(self, nu = 10, n_neighbo = 10, hs_ST = False, VisiumC counts = self.sc_adata.to_df().T cell_types = pd.DataFrame(self.sc_adata.obs[self.cell_class_column]) nUMI = pd.DataFrame(self.sc_adata.to_df().T.sum(0)) - reference = Reference(counts, cell_types, nUMI) + reference = Reference(counts, cell_types, nUMI, loggings=self.loggings) estimated_be = calculate_batch_effect(puck, reference, add_genes, max_cores = 22, loggings = self.loggings) sp_data = self.sp_adata_be.copy() diff --git a/src/Nuclei_Segmentation.py b/src/Nuclei_Segmentation.py index 7b2ca31..7a4e0c9 100644 --- a/src/Nuclei_Segmentation.py +++ b/src/Nuclei_Segmentation.py @@ -16,13 +16,14 @@ class SpatialScopeNS: - def __init__(self,tissue,out_dir,ST_Data,Img_Data,prob_thresh,max_cell_number): + def __init__(self,tissue,out_dir,ST_Data,Img_Data,prob_thresh,max_cell_number,min_counts): self.tissue = tissue self.out_dir = out_dir self.ST_Data = ST_Data self.Img_Data = Img_Data self.prob_thresh = prob_thresh self.max_cell_number = max_cell_number + self.min_counts = min_counts if not os.path.exists(out_dir): os.mkdir(out_dir) if not os.path.exists(os.path.join(out_dir,tissue)): @@ -31,16 +32,16 @@ def __init__(self,tissue,out_dir,ST_Data,Img_Data,prob_thresh,max_cell_number): self.out_dir = os.path.join(out_dir,tissue) loggings = configure_logging(os.path.join(self.out_dir,'logs')) self.loggings = loggings - self.LoadData(self.ST_Data, self.Img_Data) + self.LoadData(self.ST_Data, self.Img_Data, self.min_counts) - def LoadData(self, ST_Data, Img_Data): + def LoadData(self, ST_Data, Img_Data, min_counts): self.loggings.info(f'Reading spatial data: {ST_Data}') sp_adata = anndata.read_h5ad(ST_Data) sp_adata.obs_names_make_unique() sp_adata.var_names_make_unique() self.loggings.info(f'Spatial data shape: {sp_adata.shape}') - sc.pp.filter_cells(sp_adata, min_counts=1000) + sc.pp.filter_cells(sp_adata, min_counts=min_counts) sc.pp.filter_cells(sp_adata, max_counts=20000) self.loggings.info(f'Spatial data shape after QC: {sp_adata.shape}') self.sp_adata = sp_adata @@ -119,7 +120,7 @@ def NucleiSegmentation(self): self.sp_adata.obs["cell_count"] = self.sp_adata.obsm["image_features"]["segmentation_label"].astype(int) - fig, axes = plt.subplots(1, 3,figsize=(30,9),dpi=100) + fig, axes = plt.subplots(1, 3,figsize=(30,9),dpi=250) self.image.show("image", ax=axes[0]) _ = axes[0].set_title("H&E") self.image.show("segmented_stardist_default", cmap="jet", interpolation="none", ax=axes[1]) @@ -164,11 +165,12 @@ def NucleiSegmentation(self): parser.add_argument('--out_dir', type=str, help='output path', default=None) parser.add_argument('--ST_Data', type=str, help='ST data path', default=None) parser.add_argument('--Img_Data', type=str, help='H&E stained image data path', default=None) - parser.add_argument('--prob_thresh', type=float, help='object probability threshold', default=0.5) + parser.add_argument('--prob_thresh', type=float, help='object probability threshol, decrease this parameter if too many nucleus are missing', default=0.5) parser.add_argument('--max_cell_number', type=int, help='maximum cell number per spot', default=20) + parser.add_argument('--min_counts', type=int, help='minimum UMI count per spot', default=500) args = parser.parse_args() - NS = SpatialScopeNS(args.tissue, args.out_dir, args.ST_Data, args.Img_Data, args.prob_thresh, args.max_cell_number) + NS = SpatialScopeNS(args.tissue, args.out_dir, args.ST_Data, args.Img_Data, args.prob_thresh, args.max_cell_number, args.min_counts) NS.NucleiSegmentation()