From d77ed8151a6c7f6d0d7169f2723076ee39980473 Mon Sep 17 00:00:00 2001
From: JiaShun-Xiao <jxiaoae@connect.ust.hk>
Date: Wed, 21 Jun 2023 00:49:10 +0800
Subject: [PATCH] add compared methods

---
 compared_methods/Cell2location_pipeline.py |  2 +
 compared_methods/CytoSPACE_pipeline.py     | 26 +++++++--
 compared_methods/DSTG_pipeline.sh          | 28 +++++++++
 compared_methods/DestVI_pipeline.py        | 66 +++++++++++++++++++++
 compared_methods/SPOTlight_pipeline.r      | 53 +++++++++++++++++
 compared_methods/STRIDE_pipeline.sh        | 30 ++++++++++
 compared_methods/SpaOTsc_pipeline.py       | 57 ++++++++++++++++++
 compared_methods/TangramCell_pipeline.py   |  9 ++-
 compared_methods/novoSpaRc_pipeline.py     | 68 ++++++++++++++++++++++
 src/Cell_Type_Identification.py            |  8 +--
 src/Nuclei_Segmentation.py                 | 16 ++---
 11 files changed, 347 insertions(+), 16 deletions(-)
 create mode 100755 compared_methods/DSTG_pipeline.sh
 create mode 100644 compared_methods/DestVI_pipeline.py
 create mode 100644 compared_methods/SPOTlight_pipeline.r
 create mode 100755 compared_methods/STRIDE_pipeline.sh
 create mode 100644 compared_methods/SpaOTsc_pipeline.py
 create mode 100644 compared_methods/novoSpaRc_pipeline.py

diff --git a/compared_methods/Cell2location_pipeline.py b/compared_methods/Cell2location_pipeline.py
index defc929..4d38aeb 100644
--- a/compared_methods/Cell2location_pipeline.py
+++ b/compared_methods/Cell2location_pipeline.py
@@ -9,6 +9,8 @@
 
 import os
 
+os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+
 import cell2location
 import scvi
 
diff --git a/compared_methods/CytoSPACE_pipeline.py b/compared_methods/CytoSPACE_pipeline.py
index 184c1ff..74e2da7 100644
--- a/compared_methods/CytoSPACE_pipeline.py
+++ b/compared_methods/CytoSPACE_pipeline.py
@@ -34,6 +34,18 @@
 adata_st.var_names_make_unique()
 adata_st.obs_names_make_unique()
 
+if adata_st.X.max()<30:
+    try:
+        adata_st.X = np.exp(adata_st.X) - 1
+    except:
+        adata_st.X = np.exp(adata_st.X.toarray()) - 1
+        
+if adata_sc.X.max()<30:
+    try:
+        adata_sc.X = np.exp(adata_sc.X) - 1
+    except:
+        adata_sc.X = np.exp(adata_sc.X.toarray()) - 1
+
 sel_genes = list(set(adata_st.var.index.values)&set(adata_sc.var.index.values))
 
 adata_sc = adata_sc[:,sel_genes]
@@ -64,8 +76,12 @@
 sp_express_df.to_csv(sp_express_df_file_name,sep='\t')
 
 
-adata_st.obs['row'] = adata_st.obs['X']
-adata_st.obs['col'] = adata_st.obs['Y']
+try:
+    adata_st.obs['row'] = adata_st.obs['X']
+    adata_st.obs['col'] = adata_st.obs['Y']
+except:
+    adata_st.obs['row'] = adata_st.obs['x']
+    adata_st.obs['col'] = adata_st.obs['y']
 sp_coord_df = adata_st.obs[['row','col']]
 sp_coord_df.index.name = 'SpotID'
 sp_coord_df_file = tempfile.NamedTemporaryFile(suffix='.txt')
@@ -100,7 +116,8 @@
 df['cell_type_cytospace'] = df['cytospace_result'].apply(lambda x:x.split(':')[0])
 df['cell_index_cytospace'] = df['cytospace_result'].apply(lambda x:x.split(':')[1])
 
-df.to_csv('/'.join(output_file_path.split('/')[:-1]) + '/CytoSPACE_SinglecellMapping_result.txt')
+#df.to_csv('/'.join(output_file_path.split('/')[:-1]) + '/CytoSPACE_SinglecellMapping_result.txt')
+df.to_csv(output_file_path + '/CytoSPACE_SinglecellMapping_result.txt')
 
 ss_res = df.copy()
 ss_res = pd.pivot_table(ss_res[['spot_index','cell_type_cytospace']],index=['spot_index'],columns=['cell_type_cytospace'], aggfunc=len, fill_value=0).reset_index()
@@ -108,4 +125,5 @@
 ss_res = pd.DataFrame(ss_res.values/ss_res.values.sum(1)[:,None],columns=ss_res.columns,index=ss_res.index)
 ss_res = adata_st.obs[[]].merge(ss_res,left_index=True,right_index=True,how='left')
 ss_res = ss_res.fillna(0)
-ss_res.to_csv('/'.join(output_file_path.split('/')[:-1]) + '/CytoSPACE_result.txt')
\ No newline at end of file
+#ss_res.to_csv('/'.join(output_file_path.split('/')[:-1]) + '/CytoSPACE_result.txt')
+ss_res.to_csv(output_file_path + '/CytoSPACE_result.txt')
\ No newline at end of file
diff --git a/compared_methods/DSTG_pipeline.sh b/compared_methods/DSTG_pipeline.sh
new file mode 100755
index 0000000..b9fc878
--- /dev/null
+++ b/compared_methods/DSTG_pipeline.sh
@@ -0,0 +1,28 @@
+set -ex
+
+scrna_path=$1
+spatial_path=$2
+celltype_key=$3
+output_path=$4
+
+
+echo $scrna_path
+echo $spatial_path
+echo $celltype_key
+
+cd /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/DSTG/DSTG
+
+Rscript convert_data_h5ad.r $scrna_path $celltype_key $spatial_path
+
+Rscript convert_data.R scRNAseq_data.RDS  spatial_data.RDS  scRNAseq_label.RDS 
+
+python  train.py
+
+cp ./DSTG_Result/predict_output.csv $output_path/DSTG_output.csv
+
+# ./DSTG_pipeline.sh \
+# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5seurat \
+# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/spatial.h5seurat \
+# cell_type
+
+# ~/spatial/SpatialScope/compared_methods/DSTG/DSTG/DSTG_Result/predict_output.csv
diff --git a/compared_methods/DestVI_pipeline.py b/compared_methods/DestVI_pipeline.py
new file mode 100644
index 0000000..e8b8ffa
--- /dev/null
+++ b/compared_methods/DestVI_pipeline.py
@@ -0,0 +1,66 @@
+import scanpy as sc
+import numpy as np
+import pandas as pd
+
+import scvi
+from scvi.model import CondSCVI, DestVI
+
+import sys
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+
+# python DestVI_pipeline.py \
+# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5ad \
+# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/spatial.h5ad \
+# cell_type \
+# ./test
+
+scrna_path = sys.argv[1]
+spatial_path = sys.argv[2]
+celltype_key = sys.argv[3]
+output_path = sys.argv[4]
+
+sc_adata = sc.read_h5ad(scrna_path)
+st_adata = sc.read_h5ad(spatial_path)
+
+# filter genes to be the same on the spatial data
+intersect = np.intersect1d(sc_adata.var_names, st_adata.var_names)
+st_adata = st_adata[:, intersect].copy()
+sc_adata = sc_adata[:, intersect].copy()
+G = len(intersect)
+
+# let us filter some genes
+G = 2000
+sc.pp.filter_genes(sc_adata, min_counts=10)
+
+sc_adata.layers["counts"] = sc_adata.X.copy()
+
+sc.pp.highly_variable_genes(
+    sc_adata,
+    n_top_genes=G,
+    subset=True,
+    layer="counts",
+    flavor="seurat_v3"
+)
+
+sc.pp.normalize_total(sc_adata, target_sum=10e4)
+sc.pp.log1p(sc_adata)
+sc_adata.raw = sc_adata
+st_adata.layers["counts"] = st_adata.X.copy()
+sc.pp.normalize_total(st_adata, target_sum=10e4)
+sc.pp.log1p(st_adata)
+st_adata.raw = st_adata
+# filter genes to be the same on the spatial data
+intersect = np.intersect1d(sc_adata.var_names, st_adata.var_names)
+st_adata = st_adata[:, intersect].copy()
+sc_adata = sc_adata[:, intersect].copy()
+G = len(intersect)
+CondSCVI.setup_anndata(sc_adata, layer="counts", labels_key=celltype_key)
+sc_model = CondSCVI(sc_adata, weight_obs=True)
+sc_model.train(max_epochs=250,lr=0.0001)
+sc_model.history["elbo_train"].plot()
+DestVI.setup_anndata(st_adata, layer="counts")
+st_model = DestVI.from_rna_model(st_adata, sc_model)
+st_model.train(max_epochs=2500)
+st_model.history["elbo_train"].plot()
+st_model.get_proportions().to_csv(output_path + '/DestVI_result.txt')
diff --git a/compared_methods/SPOTlight_pipeline.r b/compared_methods/SPOTlight_pipeline.r
new file mode 100644
index 0000000..bfb0c39
--- /dev/null
+++ b/compared_methods/SPOTlight_pipeline.r
@@ -0,0 +1,53 @@
+library(Matrix)
+library(data.table)
+library(Seurat)
+library(dplyr)
+library(SPOTlight)
+library(igraph)
+library(RColorBrewer)
+library(SeuratDisk)
+library(future)
+# check the current active plan
+plan("multiprocess", workers = 8)
+options(future.globals.maxSize= 3*1024*1024^2)
+
+args<-commandArgs(T)
+scrna_path = args[1]
+spatial_path = args[2]
+celltype_final = args[3]
+output_path = args[4]
+
+sc <- LoadH5Seurat(scrna_path)
+st <- LoadH5Seurat(spatial_path,meta.data=F)
+
+set.seed(123)
+sc <- Seurat::SCTransform(sc, verbose = FALSE)
+
+Idents(sc) <- sc@meta.data[,celltype_final]
+
+cluster_markers_all <- FindAllMarkers(object = sc, 
+                                              assay = "SCT",
+                                              slot = "data",
+                                              verbose = TRUE, 
+                                              only.pos = TRUE)
+
+spotlight_ls <- spotlight_deconvolution(
+  se_sc = sc,
+  counts_spatial = st@assays$RNA@counts,
+  clust_vr = celltype_final, # Variable in sc_seu containing the cell-type annotation
+  cluster_markers = cluster_markers_all, # Dataframe with the marker genes
+  cl_n = 100, # number of cells per cell type to use
+  hvg = 3000, # Number of HVG to use
+  ntop = NULL, # How many of the marker genes to use (by default all)
+  transf = "uv", # Perform unit-variance scaling per cell and spot prior to factorzation and NLS
+  method = "nsNMF", # Factorization method
+  min_cont = 0 # Remove those cells contributing to a spot below a certain threshold 
+  )
+decon_mtrx <- spotlight_ls[[2]]
+write.csv(decon_mtrx[,which(colnames(decon_mtrx) != "res_ss")], paste0(output_path, '/SPOTlight_result.txt'))
+
+# Rscript SPOTlight_pipeline.r \
+# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5seurat \
+# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/spatial.h5seurat \
+# cell_type \
+# ./test
\ No newline at end of file
diff --git a/compared_methods/STRIDE_pipeline.sh b/compared_methods/STRIDE_pipeline.sh
new file mode 100755
index 0000000..9e0e0ed
--- /dev/null
+++ b/compared_methods/STRIDE_pipeline.sh
@@ -0,0 +1,30 @@
+set -ex
+
+scrna_path=$1
+spatial_path=$2
+celltype_key=$3
+output_path=$4
+
+prefix='STRIDE'
+
+
+python  /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/covert_data.py  $scrna_path $celltype_key $spatial_path
+
+topics=`awk '{print $2}' /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/scRNAseq_label.csv | sort | uniq  | wc -l`
+
+echo $topics
+
+STRIDE deconvolve --sc-count /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/scRNAseq_data.csv \
+--sc-celltype /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/scRNAseq_label.csv \
+--st-count /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE/spatial_data.csv \
+--outdir $output_path --outprefix $prefix --normalize --gene-use All --st-scale-factor 300 --sc-scale-factor 300 --ntopics $topics
+
+# ./STRIDE_pipeline.sh \
+# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5ad \
+# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/spatial.h5ad \
+# cell_type \
+# ./test
+# /home/jxiaoae/spatial/revision/SpatialBenchmarking/Codes/Deconvolution/STRIDE_pipeline.sh \
+# /home/share/xwanaf/sour_sep/benchmarking/Decon/MERFISH/MOp3_16x58_x1.0/sc_rna.h5ad \
+# /home/share/xwanaf/sour_sep/revision/data/RCTDnv_MOp/260UMI/spatial.h5ad cell_type \
+# /home/share/xiaojs/spatial/benchmarking/MOp_260UMI
\ No newline at end of file
diff --git a/compared_methods/SpaOTsc_pipeline.py b/compared_methods/SpaOTsc_pipeline.py
new file mode 100644
index 0000000..eb8093d
--- /dev/null
+++ b/compared_methods/SpaOTsc_pipeline.py
@@ -0,0 +1,57 @@
+# basic imports
+import scanpy as sc
+import pandas as pd
+import numpy as np
+from scipy.spatial import distance_matrix
+from spaotsc import SpaOTsc
+from scipy import stats
+import tangram as tg
+import sys
+# Example
+# python ~/bio/SpatialBenchmarking/Codes/Deconvolution/SpaOTsc_pipeline.py \
+# /home/share/xiaojs/spatial/sour_sep/mouce_brain_VISp/Ref_scRNA_VISp_qc2.h5ad \
+# /home/share/xiaojs/spatial/sour_sep/tangram/merfish/MERFISH_mop.h5ad \
+# cell_subclass \
+# /home/share/xwanaf/sour_sep/simulation/SpaOTsc_test
+
+
+sc_file_path = sys.argv[1]
+spatial_file_path = sys.argv[2]
+celltype_key = sys.argv[3]
+output_file_path = sys.argv[4]
+
+ad_sc = sc.read(sc_file_path)
+ad_sp = sc.read(spatial_file_path)
+df_sc = ad_sc.to_df()
+df_IS = ad_sp.to_df()
+
+try:
+    pts = ad_sp.obs[['X','Y']].values
+except:
+    pts = ad_sp.obs[['x','y']].values
+is_dmat = distance_matrix(pts, pts)
+
+df_is=df_IS
+
+gene_is=df_is.columns.tolist()
+gene_sc=df_sc.columns.tolist()
+gene_overloap=list(set(gene_is).intersection(gene_sc))
+a=df_is[gene_overloap]
+b=df_sc[gene_overloap]
+
+
+rho, pval = stats.spearmanr(a, b,axis=1)
+rho[np.isnan(rho)]=0
+mcc=rho[-(len(df_sc)):,0:len(df_is)]
+C = np.exp(1-mcc)
+
+issc = SpaOTsc.spatial_sc(sc_data=df_sc, is_data=df_is, is_dmat = is_dmat)
+
+issc.transport_plan(C**2, alpha=0, rho=1.0, epsilon=1.0, cor_matrix=mcc, scaling=False)
+gamma = issc.gamma_mapping
+for j in range(gamma.shape[1]):
+    gamma[:,j] = gamma[:,j]/np.sum(gamma[:,j])
+ad_map = sc.AnnData(gamma,obs = ad_sc.obs, var=ad_sp.obs)
+tg.project_cell_annotations(ad_map, ad_sp, annotation=celltype_key)
+ad_sp.obsm['tangram_ct_pred'].to_csv(output_file_path + '/SpaOTsc_decon.csv')
+
diff --git a/compared_methods/TangramCell_pipeline.py b/compared_methods/TangramCell_pipeline.py
index 2d1173e..e6274cd 100644
--- a/compared_methods/TangramCell_pipeline.py
+++ b/compared_methods/TangramCell_pipeline.py
@@ -82,7 +82,14 @@ def f7(seq):
 cell_locations = ad_sp.uns['cell_locations'].copy()
 
 
-
+if 'x' in ad_sp.obs.columns:
+    ad_sp.obs['X'] = ad_sp.obs['x']
+    ad_sp.obs['Y'] = ad_sp.obs['y']
+    cell_locations['X'] = cell_locations['x']
+    cell_locations['Y'] = cell_locations['y']
+if 'cell_nums' in ad_sp.obs.columns:
+    ad_sp.obs['cell_num'] = ad_sp.obs['cell_nums']
+    cell_locations['cell_num'] = cell_locations['cell_nums']
 
 ad_sp.obsm['spatial'] = ad_sp.obs[['X','Y']].values
 ad_sp_obsm_image_features = pd.DataFrame()
diff --git a/compared_methods/novoSpaRc_pipeline.py b/compared_methods/novoSpaRc_pipeline.py
new file mode 100644
index 0000000..5be6627
--- /dev/null
+++ b/compared_methods/novoSpaRc_pipeline.py
@@ -0,0 +1,68 @@
+# basic imports
+import scanpy as sc
+import pandas as pd
+import numpy as np
+from scipy.spatial import distance_matrix
+import novosparc as nc
+from scipy import stats
+import tangram as tg
+from scipy.spatial.distance import cdist
+import sys
+
+# Example
+# python ~/bio/SpatialBenchmarking/Codes/Deconvolution/novoSpaRc_pipeline.py \
+# /home/share/xiaojs/spatial/sour_sep/mouce_brain_VISp/Ref_scRNA_VISp_qc2.h5ad \
+# /home/share/xiaojs/spatial/sour_sep/tangram/merfish/MERFISH_mop.h5ad \
+# cell_subclass \
+# /home/share/xwanaf/sour_sep/simulation/novoSpaRc_test
+
+
+sc_file_path = sys.argv[1]
+spatial_file_path = sys.argv[2]
+celltype_key = sys.argv[3]
+output_file_path = sys.argv[4]
+
+ad_sc = sc.read(sc_file_path)
+ad_sp = sc.read(spatial_file_path)
+gene_names = np.array(ad_sc.var.index.values)
+dge = ad_sc.to_df().values
+num_cells = dge.shape[0]
+print ('number of cells and genes in the matrix:', dge.shape)
+
+hvg = np.argsort(np.divide(np.var(dge,axis=0),np.mean(dge,axis=0)+0.0001))
+dge_hvg = dge[:,hvg[-2000:]]
+
+try:
+    locations = ad_sp.obs[['X','Y']].values
+except:
+    locations = ad_sp.obs[['x','y']].values
+    
+num_locations = locations.shape[0]
+
+p_location, p_expression = nc.rc.create_space_distributions(num_locations, num_cells)
+cost_expression, cost_locations = nc.rc.setup_for_OT_reconstruction(dge_hvg,locations,num_neighbors_source = 5,num_neighbors_target = 5)
+
+gene_is=ad_sp.var.index.tolist()
+gene_sc=ad_sc.var.index.tolist()
+insitu_genes=list(set(gene_is).intersection(gene_sc))
+
+markers_in_sc = np.array([], dtype='int')
+for marker in insitu_genes:
+    marker_index = np.where(gene_names == marker)[0]
+    if len(marker_index) > 0:
+        markers_in_sc = np.append(markers_in_sc, marker_index[0])
+                
+insitu_matrix = np.array(ad_sp.to_df()[insitu_genes])
+cost_marker_genes = cdist(dge[:, markers_in_sc]/np.amax(dge[:, markers_in_sc]),insitu_matrix/np.amax(insitu_matrix))
+
+alpha_linear = 0.5
+gw = nc.rc._GWadjusted.gromov_wasserstein_adjusted_norm(cost_marker_genes, cost_expression, cost_locations,alpha_linear, p_expression, p_location,'square_loss', epsilon=5e-3, verbose=True)
+
+gamma = gw
+for j in range(gamma.shape[1]):
+    gamma[:,j] = gamma[:,j]/np.sum(gamma[:,j])
+ad_map = sc.AnnData(gamma,obs = ad_sc.obs, var=ad_sp.obs)
+tg.project_cell_annotations(ad_map, ad_sp, annotation=celltype_key)
+
+ad_sp.obsm['tangram_ct_pred'].to_csv(output_file_path + '/novoSpaRc_decon.csv')
+
diff --git a/src/Cell_Type_Identification.py b/src/Cell_Type_Identification.py
index 55e6566..f3d0732 100644
--- a/src/Cell_Type_Identification.py
+++ b/src/Cell_Type_Identification.py
@@ -110,13 +110,13 @@ def WarmStart(self,hs_ST,UMI_min_sigma = 300):
             else:
                 self.loggings.error('Wrong spatial location shape: {}'.format(self.sp_adata.obsm['spatial'].shape))
                 sys.exit()
-
+        UMI_min = min(UMI_min,UMI_min_sigma)
         nUMI = pd.DataFrame(np.array(self.sp_adata.X.sum(-1)), index = self.sp_adata.obs.index)
         puck = SpatialRNA(coords, counts, nUMI)
         counts = self.sc_adata.to_df().T
         cell_types = pd.DataFrame(self.sc_adata.obs[self.cell_class_column])
         nUMI = pd.DataFrame(self.sc_adata.to_df().T.sum(0))
-        reference = Reference(counts, cell_types, nUMI)
+        reference = Reference(counts, cell_types, nUMI, loggings=self.loggings)
         myRCTD = create_RCTD(puck, reference, max_cores = 22, UMI_min=UMI_min, UMI_min_sigma = UMI_min_sigma, loggings = self.loggings)
         myRCTD = run_RCTD(myRCTD, self.Q_mat_all, self.X_vals_loc, loggings = self.loggings)
         self.InitProp = myRCTD
@@ -170,7 +170,7 @@ def CellTypeIdentification(self, nu = 10, n_neighbo = 10, hs_ST = False, VisiumC
             with open(os.path.join(self.out_dir, 'InitProp.pickle'), 'rb') as handle:
                 self.InitProp = pickle.load(handle)
                 self.LoadLikelihoodTable()
-            
+        cell_locations = cell_locations.loc[cell_locations['spot_index'].isin(self.InitProp['results'].index.values)]
         CellTypeLabel = SingleCellTypeIdentification(self.InitProp, cell_locations, 'spot_index', self.Q_mat_all, self.X_vals_loc, nu = nu, n_epoch = 8, n_neighbo = n_neighbo, loggings = self.loggings, hs_ST = hs_ST)
         label2ct = CellTypeLabel['label2ct']
         discrete_label = CellTypeLabel['discrete_label'].copy()
@@ -217,7 +217,7 @@ def CellTypeIdentification(self, nu = 10, n_neighbo = 10, hs_ST = False, VisiumC
         counts = self.sc_adata.to_df().T
         cell_types = pd.DataFrame(self.sc_adata.obs[self.cell_class_column])
         nUMI = pd.DataFrame(self.sc_adata.to_df().T.sum(0))
-        reference = Reference(counts, cell_types, nUMI)
+        reference = Reference(counts, cell_types, nUMI, loggings=self.loggings)
         estimated_be = calculate_batch_effect(puck, reference, add_genes, max_cores = 22, loggings = self.loggings)
             
         sp_data = self.sp_adata_be.copy()
diff --git a/src/Nuclei_Segmentation.py b/src/Nuclei_Segmentation.py
index 7b2ca31..7a4e0c9 100644
--- a/src/Nuclei_Segmentation.py
+++ b/src/Nuclei_Segmentation.py
@@ -16,13 +16,14 @@
 
     
 class SpatialScopeNS:
-    def __init__(self,tissue,out_dir,ST_Data,Img_Data,prob_thresh,max_cell_number):
+    def __init__(self,tissue,out_dir,ST_Data,Img_Data,prob_thresh,max_cell_number,min_counts):
         self.tissue = tissue
         self.out_dir = out_dir 
         self.ST_Data = ST_Data
         self.Img_Data = Img_Data
         self.prob_thresh = prob_thresh
         self.max_cell_number = max_cell_number
+        self.min_counts = min_counts
         if not os.path.exists(out_dir):
             os.mkdir(out_dir)
         if not os.path.exists(os.path.join(out_dir,tissue)):
@@ -31,16 +32,16 @@ def __init__(self,tissue,out_dir,ST_Data,Img_Data,prob_thresh,max_cell_number):
         self.out_dir = os.path.join(out_dir,tissue)
         loggings = configure_logging(os.path.join(self.out_dir,'logs'))
         self.loggings = loggings 
-        self.LoadData(self.ST_Data, self.Img_Data)
+        self.LoadData(self.ST_Data, self.Img_Data, self.min_counts)
 
 
-    def LoadData(self, ST_Data, Img_Data):
+    def LoadData(self, ST_Data, Img_Data, min_counts):
         self.loggings.info(f'Reading spatial data: {ST_Data}')
         sp_adata = anndata.read_h5ad(ST_Data)
         sp_adata.obs_names_make_unique()
         sp_adata.var_names_make_unique()
         self.loggings.info(f'Spatial data shape: {sp_adata.shape}')
-        sc.pp.filter_cells(sp_adata, min_counts=1000)
+        sc.pp.filter_cells(sp_adata, min_counts=min_counts)
         sc.pp.filter_cells(sp_adata, max_counts=20000)
         self.loggings.info(f'Spatial data shape after QC: {sp_adata.shape}')
         self.sp_adata = sp_adata
@@ -119,7 +120,7 @@ def NucleiSegmentation(self):
         
         self.sp_adata.obs["cell_count"] = self.sp_adata.obsm["image_features"]["segmentation_label"].astype(int)
         
-        fig, axes = plt.subplots(1, 3,figsize=(30,9),dpi=100)
+        fig, axes = plt.subplots(1, 3,figsize=(30,9),dpi=250)
         self.image.show("image", ax=axes[0])
         _ = axes[0].set_title("H&E")
         self.image.show("segmented_stardist_default", cmap="jet", interpolation="none", ax=axes[1])
@@ -164,11 +165,12 @@ def NucleiSegmentation(self):
     parser.add_argument('--out_dir', type=str, help='output path', default=None)
     parser.add_argument('--ST_Data', type=str, help='ST data path', default=None)
     parser.add_argument('--Img_Data', type=str, help='H&E stained image data path', default=None)
-    parser.add_argument('--prob_thresh', type=float, help='object probability threshold', default=0.5)
+    parser.add_argument('--prob_thresh', type=float, help='object probability threshol, decrease this parameter if too many nucleus are missing', default=0.5)
     parser.add_argument('--max_cell_number', type=int, help='maximum cell number per spot', default=20)
+    parser.add_argument('--min_counts', type=int, help='minimum UMI count per spot', default=500)
     args = parser.parse_args()
     
-    NS = SpatialScopeNS(args.tissue, args.out_dir, args.ST_Data, args.Img_Data, args.prob_thresh, args.max_cell_number)
+    NS = SpatialScopeNS(args.tissue, args.out_dir, args.ST_Data, args.Img_Data, args.prob_thresh, args.max_cell_number, args.min_counts)
     NS.NucleiSegmentation()