Skip to content

Commit

Permalink
Minor changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeff1995 committed Apr 2, 2019
1 parent cf2fb2b commit 0bbaa16
Show file tree
Hide file tree
Showing 11 changed files with 24,621 additions and 14 deletions.
2 changes: 1 addition & 1 deletion Cell_BLAST/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@
"config"
]

__version__ = "0.1.1"
__version__ = "0.1.2"
26 changes: 23 additions & 3 deletions Cell_BLAST/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,16 @@ def __getitem__(self, slices):
uns=copy.deepcopy(dict(self.uns))
)

def clean_duplicate_vars(self):
unique_vars, duplicate_mask = \
set(), np.ones(self.var_names.size).astype(np.bool_)
for idx, item in enumerate(self.var_names):
if item in unique_vars:
duplicate_mask[idx] = False
else:
unique_vars.add(item)
return self[:, duplicate_mask]

def get_meta_or_var(self, names, normalize_var=False, log_var=False):
"""
Get either meta information (column names in ``obs``) or
Expand Down Expand Up @@ -796,7 +806,7 @@ def obs_correlation_heatmap(

def violin(
self, group, var, normalize_var=True, width=7, height=7,
ax=None, **kwargs
ax=None, strip_kws=None, violin_kws=None
):
"""
Violin plot across obs groups.
Expand All @@ -816,7 +826,10 @@ def violin(
ax : matplotlib.axes.Axes
Specify an existing axes to plot onto, by default None.
If specified, ``width`` and ``height`` take no effect.
**kwargs
strip_kws : dict
Additional keyword arguments will be passed to
``seaborn.stripplot``.
violin_kws : dict
Additional keyword arguments will be passed to
``seaborn.violinplot``.
Expand All @@ -828,15 +841,22 @@ def violin(
import matplotlib.pyplot as plt
import seaborn as sns

strip_kws = {} if strip_kws is None else strip_kws
violin_kws = {} if violin_kws is None else violin_kws

df = self.get_meta_or_var(
[group, var],
normalize_var=normalize_var, log_var=True
)
if ax is None:
_, ax = plt.subplots(figsize=(width, height))
ax = sns.stripplot(
x=group, y=var, data=df,
color=".3", edgecolor=None, size=3, ax=ax, **strip_kws
)
ax = sns.violinplot(
x=group, y=var, data=df,
scale="width", ax=ax, inner="point", **kwargs
scale="width", ax=ax, inner=None, **violin_kws
)
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
Expand Down
8 changes: 4 additions & 4 deletions Datasets/ACA_datasets.csv
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ Dahlin_10x,Mus musculus,Bone Marrow,NA,10x,46447,A single-cell hematopoietic lan
Dahlin_mutant,Mus musculus,Bone Marrow,NA,10x,14675,A single-cell hematopoietic landscape resolves 8 lineage trajectories and defects in Kit mutant mice,TRUE,c-Kit mutant,collect_dahlin.R
Quake_10x_Bone_Marrow,Mus musculus,Bone Marrow,,10x,3652,Single-cell transcriptomics of 20 mouse organs creates a Tabula Muris,TRUE,,collect_quake_10x.R
Quake_Smart-seq2_Bone_Marrow,Mus musculus,Bone Marrow,,Smart-seq2,5037,Single-cell transcriptomics of 20 mouse organs creates a Tabula Muris,TRUE,,collect_quake_smartseq2.R
Tusi,Mus musculus,Bone Marrow,adult,inDrop,4763,Population snapshots predict early haematopoietic and erythroid hierarchies,FALSE,Continuous,collect_tusi.R
Velten_QUARTZ-seq,Homo sapiens,Bone Marrow,29-year-old,QUARTZ-seq,379,Human haematopoietic stem cell lineage commitment is a continuous process,FALSE,"no meta, continuous",collect_velten.R
Velten_Smart-seq2,Homo sapiens,Bone Marrow,25-year-old,Smart-seq2,1035,Human haematopoietic stem cell lineage commitment is a continuous process,FALSE,"no meta, continuous",collect_velten.R
Tusi,Mus musculus,Bone Marrow,adult,inDrop,4763,Population snapshots predict early haematopoietic and erythroid hierarchies,TRUE,Continuous,collect_tusi.R
Velten_QUARTZ-seq,Homo sapiens,Bone Marrow,29-year-old,QUARTZ-seq,379,Human haematopoietic stem cell lineage commitment is a continuous process,TRUE,"no meta, continuous",collect_velten.R
Velten_Smart-seq2,Homo sapiens,Bone Marrow,25-year-old,Smart-seq2,1035,Human haematopoietic stem cell lineage commitment is a continuous process,TRUE,"no meta, continuous",collect_velten.R
Campbell,Mus musculus,Brain,,Drop-seq,20921,A molecular census of arcuate hypothalamus and median eminence cell types,TRUE,Adult Arc-ME complex,collect_campbell.R
Chen,Mus musculus,Brain,,Drop-seq,12089,Single-Cell RNA-Seq Reveals Hypothalamic Cell Diversity,TRUE,Adult hypothalamus,collect_chen.R
Lake_2018,Homo sapiens,Brain,,snDrop-seq,35289,Integrative single-cell analysis of transcriptional and epigenetic states in the human adult brain,TRUE,,collect_lake_2018.R
Expand Down Expand Up @@ -85,4 +85,4 @@ Montoro_10x,Mus musculus,Trachea,adult,10x,7193,A revised airway epithelial hier
Montoro_Smart-seq2,Mus musculus,Trachea,adult,modified Smart-seq2,301,A revised airway epithelial hierarchy includes CFTR-expressing ionocytes,TRUE,3 WT,collect_montoro_smartseq2.R
Plasschaert,Mus musculus,Trachea,adult,inDrop,6977,A single-cell atlas of the airway epithelium reveals the CFTR-rich pulmonary ionocyte,TRUE,4 WT,collect_plasschaert.R
Quake_10x_Trachea,Mus musculus,Trachea,,10x,11269,Single-cell transcriptomics of 20 mouse organs creates a Tabula Muris,TRUE,,collect_quake_10x.R
Quake_Smart-seq2_Trachea,Mus musculus,Trachea,,Smart-seq2,1350,Single-cell transcriptomics of 20 mouse organs creates a Tabula Muris,TRUE,,collect_quake_smartseq2.R
Quake_Smart-seq2_Trachea,Mus musculus,Trachea,,Smart-seq2,1350,Single-cell transcriptomics of 20 mouse organs creates a Tabula Muris,TRUE,,collect_quake_smartseq2.R
3 changes: 2 additions & 1 deletion Datasets/aligned_datasets.csv
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ ALIGNED_Mus_musculus_Pancreas,Mus musculus,Pancreas,,,,,,"Baron_mouse, Quake_Sma
ALIGNED_Mus_musculus_Retina,Mus musculus,Retina,,,,,,"Macosko, Shekhar"
ALIGNED_Mus_musculus_Small_Intestine,Mus musculus,Small Intestine,,,,,,"Haber_10x, Haber_10x_largecell, Haber_10x_region, Haber_10x_FAE, Haber_Smart-seq2"
ALIGNED_Mus_musculus_Spleen,Mus musculus,Spleen,,,,,,"Quake_10x_Spleen, Quake_Smart-seq2_Spleen"
ALIGNED_Mus_musculus_Heart_and_Aorta,Mus musculus,"Heart, Aorta",,,,,,"Quake_10x_Heart_and_Aorta, Quake_Smart-seq2_Heart"
ALIGNED_Mus_musculus_Thymus,Mus musculus,Thymus,,,,,,"Quake_10x_Thymus, Quake_Smart-seq2_Thymus"
ALIGNED_Mus_musculus_Tongue,Mus musculus,Tongue,,,,,,"Quake_10x_Tongue, Quake_Smart-seq2_Tongue"
ALIGNED_Mus_musculus_Trachea,Mus musculus,Trachea,,,,,,"Quake_10x_Trachea, Quake_Smart-seq2_Trachea"
ALIGNED_Tabula_Muris,Mus musculus,Trachea,,,,,,"Quake_10x, Quake_Smart-seq2"
ALIGNED_Tabula_Muris,Mus musculus,Atlas,,,,,,"Quake_10x, Quake_Smart-seq2"
70 changes: 70 additions & 0 deletions Datasets/collect/collect_ariss.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#! /usr/bin/env Rscript
# by wangshuai
# 11 Mar 2019
# 14:36:35 PM

suppressPackageStartupMessages({
library(Seurat)
})
source("../../Utilities/data.R", chdir = TRUE)

#READ label file
cat("Reading label file...\n")
metadata1 <- read.table("../download/Ariss/wt_Rbf_and_populations.txt",header=T,stringsAsFactors = F)
row.names(metadata1) <- metadata1[,'CellName']
metadata1 <- metadata1[,c('Genotype','cell_type1')]

metadata2 <- read.table("../download/Ariss/Cells_and_population.txt",header=T,row.names=1,stringsAsFactors = F)
metadata2$Genotype <- 'wt'

includedcells<-union(row.names(metadata1),row.names(metadata2))
metadata <- rbind(metadata2,metadata1)
metadata <- metadata[which(row.names(metadata) %in% includedcells),]

celltypes <- read.csv('../download/celltypes',sep='\t')

metadata$lifestage <- 'third instar larva stage'
metadata$organ <- 'eye disc'
metadata$race <- 'Drosophila melanogaster'

#READ DGE
cat("Reading DGE\n")
path <- "../download/Ariss/GSE115476"
fileNames <- dir(path)
filePath <- sapply(fileNames, function(x){
paste(path,x,sep='/')})
data <- lapply(filePath, function(x){
read.table(x, header=T,stringsAsFactors = F)})

i <- 1
for (name in names(data)){
perfix<-substr(name,gregexpr(pattern = '_',text = name)[[1]]+1,gregexpr(pattern = "\\.",text = name)[[1]]-1)
colnames(data[[i]]) <- lapply(colnames(data[[i]]),function(x){
paste(perfix,x,sep='_')})
genes <- data[[i]][,1]
included_cells <- intersect(rownames(metadata), colnames(data[[i]]))
data[[i]] <- data.frame(genes,data[[i]][, included_cells])
i <- i+1
}

expmerge <- Reduce(function(x,y) merge(x,y,by=1,all=T),data)
row.names(expmerge)<-expmerge[,1]
expmerge<-expmerge[,-1]
included_cells <- intersect(rownames(metadata), colnames(expmerge))
metadata <- metadata[included_cells, ]
expmerge <- expmerge[, included_cells]
expmerge[is.na(expmerge)]<-0

expressed_genes <- rownames(expmerge)[rowSums(expmerge > 1) > 5]
expmerge <- Matrix(as.matrix(expmerge),sparse = T)

message("Constructing dataset...")
dataset <- new("ExprDataSet",
exprs = expmerge, obs = metadata,
var = data.frame(row.names = rownames(expmerge)),
uns = list(expressed_genes = expressed_genes)
)

message("Saving data...")
write_dataset(dataset, "../data/Ariss/data.h5")
cat("Done!\n")
7 changes: 6 additions & 1 deletion Datasets/collect/collect_tusi.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,15 @@ colnames(potential) <- "potential"
meta_df <- Reduce(cbind, list(meta_df, fate, potential))
rownames(meta_df) <- meta_df$cell_id
meta_df$cell_id <- NULL
meta_df$cell_type1 = "HSPC"

expr_mat <- expr_mat[rownames(meta_df), ]

#assign cell ontology
cell_ontology <- read.csv("../cell_ontology/bone_marrow_cell_ontology.csv")
cell_ontology <- cell_ontology[, c("cell_type1", "cell_ontology_class", "cell_ontology_id")]

#datasets_meta
datasets_meta <- read.csv("../ACA_datasets.csv", header = TRUE, row.names = 1)
construct_dataset("../data/Tusi", t(expr_mat), meta_df, datasets_meta, grouping = "batch")
construct_dataset("../data/Tusi", t(expr_mat), meta_df, datasets_meta, cell_ontology, grouping = "batch")
message("Done!")
Loading

0 comments on commit 0bbaa16

Please sign in to comment.