diff --git a/utils.py b/utils.py
deleted file mode 100644
index 54acd81..0000000
--- a/utils.py
+++ /dev/null
@@ -1,414 +0,0 @@
-import os
-import numpy as np
-import scanpy as sc
-import pandas as pd
-import anndata
-import umap
-from portal.model import *
-import rpy2.robjects as ro
-import rpy2.robjects.numpy2ri
-import anndata2ri
-from sklearn.neighbors import NearestNeighbors
-
-def preprocess_datasets(adata_list, # list of anndata to be integrated
-                        hvg_num=4000, # number of highly variable genes for each anndata
-                        save_embedding=False, # save low-dimensional embeddings or not
-                        data_path="data"
-                        ):
-
-    if len(adata_list) < 2:
-        raise ValueError("There should be at least two datasets for integration!")
-
-    sample_size_list = []
-
-    print("Finding highly variable genes...")
-    for i, adata in enumerate(adata_list):
-        sample_size_list.append(adata.shape[0])
-        # adata = adata_input.copy()
-        sc.pp.highly_variable_genes(adata, flavor='seurat_v3', n_top_genes=hvg_num)
-        hvg = adata.var[adata.var.highly_variable == True].sort_values(by="highly_variable_rank").index
-        if i == 0:
-            hvg_total = hvg
-        else:
-            hvg_total = hvg_total & hvg
-        if len(hvg_total) < 100:
-            raise ValueError("The total number of highly variable genes is smaller than 100 (%d). Try to set a larger hvg_num." % len(hvg_total))
-
-    print("Normalizing and scaling...")
-    for i, adata in enumerate(adata_list):
-        # adata = adata_input.copy()
-        sc.pp.normalize_total(adata, target_sum=1e4)
-        sc.pp.log1p(adata)
-        adata = adata[:, hvg_total]
-        sc.pp.scale(adata, max_value=10)
-        if i == 0:
-            adata_total = adata
-        else:
-            adata_total = adata_total.concatenate(adata, index_unique=None)
-
-    print("Dimensionality reduction via PCA...")
-    npcs = 30
-    pca = PCA(n_components=npcs, svd_solver="arpack", random_state=0)
-    adata_total.obsm["X_pca"] = pca.fit_transform(adata_total.X)
-
-    indices = np.cumsum(sample_size_list)
-
-    data_path = os.path.join(data_path, "preprocess")
-
-    if not os.path.exists(data_path):
-        os.makedirs(data_path)
-
-    if save_embedding:
-        for i in range(len(indices)):
-            if i == 0:
-                np.save(os.path.join(data_path, "lowdim_1.npy"), 
-                        adata_total.obsm["X_pca"][:indices[0], :npcs])
-            else:
-                np.save(os.path.join(data_path, "lowdim_%d.npy" % (i + 1)), 
-                        adata_total.obsm["X_pca"][indices[i-1]:indices[i], :npcs])
-
-    lowdim = adata_total.obsm["X_pca"].copy()
-    lowdim_list = [lowdim[:indices[0], :npcs] if i == 0 else lowdim[indices[i - 1]:indices[i], :npcs] for i in range(len(indices))]
-
-    return lowdim_list
-
-
-def integrate_datasets(lowdim_list, # list of low-dimensional representations
-                       search_cos=False, # searching for an optimal lambdacos
-                       lambda_cos=20.0,
-                       training_steps=2000,
-                       space=None, # None or "reference" or "latent"
-                       data_path="data"
-                       ):
-
-    if space == None:
-        if len(lowdim_list) == 2:
-            space = "latent"
-        else:
-            space = "reference"
-
-    print("Incrementally integrating %d datasets..." % len(lowdim_list))
-
-    if not search_cos:
-        # if not search hyperparameter lambdacos
-        for i in range(len(lowdim_list) - 1):
-            print("Integrating the %d-th dataset to the 1-st dataset..." % (i + 2))
-            model = Model(lambdacos=lambda_cos,
-                          training_steps=training_steps, 
-                          data_path=os.path.join(data_path, "preprocess"), 
-                          model_path="models/%d_datasets" % (i + 2), 
-                          result_path="results/%d_datasets" % (i + 2))
-            if i == 0:
-                model.emb_A = lowdim_list[0]
-            else:
-                model.emb_A = emb_total
-            model.emb_B = lowdim_list[i + 1]
-            model.train()
-            model.eval()
-            emb_total = model.data_Aspace
-        if space == "reference":
-            return emb_total
-        elif space == "latent":
-            return model.latent
-        else:
-            raise ValueError("Space should be either 'reference' or 'latent'.")
-    else:
-        for i in range(len(lowdim_list) - 1):
-            print("Integrating the %d-th dataset to the 1-st dataset..." % (i + 2))
-            for lambda_cos in [15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0]:
-                model = Model(lambdacos=lambda_cos,
-                              training_steps=training_steps, 
-                              data_path=os.path.join(data_path, "preprocess"), 
-                              model_path="models/%d_datasets" % (i + 2), 
-                              result_path="results/%d_datasets" % (i + 2))
-                if i == 0:
-                    model.emb_A = lowdim_list[0]
-                else:
-                    model.emb_A = emb_total
-                model.emb_B = lowdim_list[i + 1]
-                model.train()
-                model.eval()
-                meta = pd.DataFrame(index=np.arange(model.emb_A.shape[0] + model.emb_B.shape[0]))
-                meta["method"] = ["A"] * model.emb_A.shape[0] + ["B"] * model.emb_B.shape[0]
-                mixing = calculate_mixing_metric(model.latent, meta, k=5, max_k=300, methods=list(set(meta.method)))
-                print("lambda_cos: %f, mixing metric: %f \n" % (lambda_cos, mixing))
-                if lambda_cos == 15.0:
-                    model_opt = model
-                    mixing_metric_opt = mixing
-                elif mixing < mixing_metric_opt:
-                    model_opt = model
-                    mixing_metric_opt = mixing
-            emb_total = model_opt.data_Aspace
-        if space == "reference":
-            return emb_total
-        elif space == "latent":
-            return model_opt.latent
-        else:
-            raise ValueError("Space should be either 'reference' or 'latent'.")
-
-
-def calculate_mixing_metric(data, meta, methods, k=5, max_k=300, subsample=True):
-    if subsample:
-        if data.shape[0] >= 1e5:
-            np.random.seed(1234)
-            subsample_idx = np.random.choice(data.shape[0], 50000, replace=False)
-            data = data[subsample_idx]
-            meta = meta.iloc[subsample_idx]
-            meta.index = np.arange(len(subsample_idx))
-    lowdim = data
-
-    nbrs = NearestNeighbors(n_neighbors=max_k, algorithm='kd_tree').fit(lowdim)
-    _, indices = nbrs.kneighbors(lowdim)
-    indices = indices[:, 1:]
-    mixing = np.zeros((data.shape[0], 2))
-    for i in range(data.shape[0]):
-        if len(np.where(meta.method[indices[i, :]] == methods[0])[0]) > k-1:
-            mixing[i, 0] = np.where(meta.method[indices[i, :]] == methods[0])[0][k-1]
-        else: mixing[i, 0] = max_k - 1
-        if len(np.where(meta.method[indices[i, :]] == methods[1])[0]) > k-1:
-            mixing[i, 1] = np.where(meta.method[indices[i, :]] == methods[1])[0][k-1]
-        else: mixing[i, 1] = max_k - 1
-    return np.mean(np.median(mixing, axis=1) + 1)
-
-
-def calculate_ARI(data, meta, anno_A="drop_subcluster", anno_B="subcluster"):
-    # np.random.seed(1234)
-    if data.shape[0] > 1e5:
-        np.random.seed(1234)
-        subsample_idx = np.random.choice(data.shape[0], 50000, replace=False)
-        data = data[subsample_idx]
-        meta = meta.iloc[subsample_idx]
-    lowdim = data
-
-    cellid = meta.index.astype(str)
-    method = meta["method"].astype(str)
-    cluster_A = meta[anno_A].astype(str)
-    if (anno_B != anno_A):
-        cluster_B = meta[anno_B].astype(str)
-
-    rpy2.robjects.numpy2ri.activate()
-    nr, nc = lowdim.shape
-    lowdim = ro.r.matrix(lowdim, nrow=nr, ncol=nc)
-    ro.r.assign("data", lowdim)
-    rpy2.robjects.numpy2ri.deactivate()
-
-    cellid = ro.StrVector(cellid)
-    ro.r.assign("cellid", cellid)
-    method = ro.StrVector(method)
-    ro.r.assign("method", method)
-    cluster_A = ro.StrVector(cluster_A)
-    ro.r.assign("cluster_A", cluster_A)
-    if (anno_B != anno_A):
-        cluster_B = ro.StrVector(cluster_B)
-        ro.r.assign("cluster_B", cluster_B)
-
-    ro.r("set.seed(1234)")
-    ro.r['library']("Seurat")
-    ro.r['library']("mclust")
-
-    ro.r("comb_normalized <- t(data)")
-    ro.r('''rownames(comb_normalized) <- paste("gene", 1:nrow(comb_normalized), sep = "")''')
-    ro.r("colnames(comb_normalized) <- as.vector(cellid)")
-
-    ro.r("comb_raw <- matrix(0, nrow = nrow(comb_normalized), ncol = ncol(comb_normalized))")
-    ro.r("rownames(comb_raw) <- rownames(comb_normalized)")
-    ro.r("colnames(comb_raw) <- colnames(comb_normalized)")
-
-    ro.r("comb <- CreateSeuratObject(comb_raw)")
-    ro.r('''scunitdata <- Seurat::CreateDimReducObject(
-                embeddings = t(comb_normalized),
-                stdev = as.numeric(apply(comb_normalized, 2, stats::sd)),
-                assay = "RNA",
-                key = "scunit")''')
-    ro.r('''comb[["scunit"]] <- scunitdata''')
-
-    ro.r("comb@meta.data$method <- method")
-
-    ro.r("comb@meta.data$cluster_A <- cluster_A")
-    if (anno_B != anno_A):
-        ro.r("comb@meta.data$cluster_B <- cluster_B")
-
-    ro.r('''comb <- FindNeighbors(comb, reduction = "scunit", dims = 1:ncol(data), force.recalc = TRUE, verbose = FALSE)''')
-    ro.r('''comb <- FindClusters(comb, verbose = FALSE)''')
-
-    if (anno_B != anno_A):
-        method_set = pd.unique(meta["method"])
-        method_A = method_set[0]
-        ro.r.assign("method_A", method_A)
-        method_B = method_set[1]
-        ro.r.assign("method_B", method_B)
-        ro.r('''indx_A <- which(comb$method == method_A)''')
-        ro.r('''indx_B <- which(comb$method == method_B)''')
-
-        ro.r("ARI_A <- adjustedRandIndex(comb$cluster_A[indx_A], comb$seurat_clusters[indx_A])")
-        ro.r("ARI_B <- adjustedRandIndex(comb$cluster_B[indx_B], comb$seurat_clusters[indx_B])")
-        ARI_A = np.array(ro.r("ARI_A"))[0]
-        ARI_B = np.array(ro.r("ARI_B"))[0]
-
-        return ARI_A, ARI_B
-    else:
-        ro.r("ARI_A <- adjustedRandIndex(comb$cluster_A, comb$seurat_clusters)")
-        ARI_A = np.array(ro.r("ARI_A"))[0]
-
-        return ARI_A
-
-
-def calculate_kBET(data, meta):
-    cellid = meta.index.astype(str)
-    method = meta["method"].astype(str)
-
-    rpy2.robjects.numpy2ri.activate()
-    nr, nc = data.shape
-    data = ro.r.matrix(data, nrow=nr, ncol=nc)
-    ro.r.assign("data", data)
-    rpy2.robjects.numpy2ri.deactivate()
-
-    cellid = ro.StrVector(cellid)
-    ro.r.assign("cellid", cellid)
-    method = ro.StrVector(method)
-    ro.r.assign("method", method)
-
-    ro.r("set.seed(1234)")
-    ro.r['library']("kBET")
-
-    accept_rate = []
-    for _ in range(100):
-        ro.r("subset_id <- sample.int(n = length(method), size = 1000, replace=FALSE)")
-
-        ro.r("batch.estimate <- kBET(data[subset_id,], method[subset_id], do.pca = FALSE, plot=FALSE)")
-        accept_rate.append(np.array(ro.r("mean(batch.estimate$results$kBET.pvalue.test > 0.05)")))
-
-    return np.median(accept_rate)
-
-
-def calculate_ASW(data, meta, anno_A="drop_subcluster", anno_B="subcluster"):
-    if data.shape[0] >= 1e5:
-        np.random.seed(1234)
-        subsample_idx = np.random.choice(data.shape[0], 50000, replace=False)
-        data = data[subsample_idx]
-        meta = meta.iloc[subsample_idx]
-    lowdim = data
-
-    cellid = meta.index.astype(str)
-    method = meta["method"].astype(str)
-    cluster_A = meta[anno_A].astype(str)
-    if (anno_B != anno_A):
-        cluster_B = meta[anno_B].astype(str)
-
-    rpy2.robjects.numpy2ri.activate()
-    nr, nc = lowdim.shape
-    lowdim = ro.r.matrix(lowdim, nrow=nr, ncol=nc)
-    ro.r.assign("data", lowdim)
-    rpy2.robjects.numpy2ri.deactivate()
-
-    cellid = ro.StrVector(cellid)
-    ro.r.assign("cellid", cellid)
-    method = ro.StrVector(method)
-    ro.r.assign("method", method)
-    cluster_A = ro.StrVector(cluster_A)
-    ro.r.assign("cluster_A", cluster_A)
-    if (anno_B != anno_A):
-        cluster_B = ro.StrVector(cluster_B)
-        ro.r.assign("cluster_B", cluster_B)
-
-    ro.r("set.seed(1234)")
-    ro.r['library']("cluster")
-
-    if (anno_B != anno_A):
-        method_set = pd.unique(meta["method"])
-        method_A = method_set[0]
-        ro.r.assign("method_A", method_A)
-        method_B = method_set[1]
-        ro.r.assign("method_B", method_B)
-        ro.r('''indx_A <- which(method == method_A)''')
-        ro.r('''indx_B <- which(method == method_B)''')
-        ro.r('''ASW_A <- summary(silhouette(as.numeric(as.factor(cluster_A[indx_A])), dist(data[indx_A, 1:20])))[["avg.width"]]''')
-        ro.r('''ASW_B <- summary(silhouette(as.numeric(as.factor(cluster_B[indx_B])), dist(data[indx_B, 1:20])))[["avg.width"]]''')
-        ASW_A = np.array(ro.r("ASW_A"))[0]
-        ASW_B = np.array(ro.r("ASW_B"))[0]
-
-        return ASW_A, ASW_B
-    else:
-        ro.r('''ASW_A <- summary(silhouette(as.numeric(as.factor(cluster_A)), dist(data[, 1:20])))[["avg.width"]]''')
-        ASW_A = np.array(ro.r("ASW_A"))[0]
-
-        return ASW_A
-
-
-def plot_UMAP(data, meta, space="latent", score=None, colors=["method"], subsample=False,
-              save=False, result_path=None, filename_suffix=None):
-    if filename_suffix is not None:
-        filenames = [os.path.join(result_path, "%s-%s-%s.pdf" % (space, c, filename_suffix)) for c in colors]
-    else:
-        filenames = [os.path.join(result_path, "%s-%s.pdf" % (space, c)) for c in colors]
-
-    if subsample:
-        if data.shape[0] >= 1e5:
-            np.random.seed(1234)
-            subsample_idx = np.random.choice(data.shape[0], 50000, replace=False)
-            data = data[subsample_idx]
-            meta = meta.iloc[subsample_idx]
-            if score is not None:
-                score = score[subsample_idx]
-    
-    adata = anndata.AnnData(X=data)
-    adata.obs.index = meta.index
-    adata.obs = pd.concat([adata.obs, meta], axis=1)
-    adata.var.index = "dim-" + adata.var.index
-    adata.obsm["latent"] = data
-    
-    # run UMAP
-    reducer = umap.UMAP(n_neighbors=30,
-                        n_components=2,
-                        metric="correlation",
-                        n_epochs=None,
-                        learning_rate=1.0,
-                        min_dist=0.3,
-                        spread=1.0,
-                        set_op_mix_ratio=1.0,
-                        local_connectivity=1,
-                        repulsion_strength=1,
-                        negative_sample_rate=5,
-                        a=None,
-                        b=None,
-                        random_state=1234,
-                        metric_kwds=None,
-                        angular_rp_forest=False,
-                        verbose=True)
-    embedding = reducer.fit_transform(adata.obsm["latent"])
-    adata.obsm["X_umap"] = embedding
-
-    n_cells = embedding.shape[0]
-    if n_cells >= 10000:
-        size = 120000 / n_cells
-    else:
-        size = 12
-
-    for i, c in enumerate(colors):
-        groups = sorted(set(adata.obs[c].astype(str)))
-        if "nan" in groups:
-            groups.remove("nan")
-        palette = "rainbow"
-        if save:
-            fig = sc.pl.umap(adata, color=c, palette=palette, groups=groups, return_fig=True, size=size)
-            fig.savefig(filenames[i], bbox_inches='tight', dpi=300)
-        else:
-            sc.pl.umap(adata, color=c, palette=palette, groups=groups, size=size)
-
-    if space == "Aspace":
-        method_set = pd.unique(meta["method"])
-        adata.obs["score"] = score
-        adata.obs["margin"] = (score < -5.0) * 1
-        fig = sc.pl.umap(adata[meta["method"]==method_set[1]], color="score", palette=palette, groups=groups, return_fig=True, size=size)
-        fig.savefig(os.path.join(result_path, "%s-score.pdf" % space), bbox_inches='tight', dpi=300)
-        fig = sc.pl.umap(adata[meta["method"]==method_set[1]], color="margin", palette=palette, groups=groups, return_fig=True, size=size)
-        fig.savefig(os.path.join(result_path, "%s-margin.pdf" % space), bbox_inches='tight', dpi=300)
-    if space == "Bspace":
-        method_set = pd.unique(meta["method"])
-        adata.obs["score"] = score
-        adata.obs["margin"] = (score < -5.0) * 1
-        fig = sc.pl.umap(adata[meta["method"]==method_set[0]], color="score", palette=palette, groups=groups, return_fig=True, size=size)
-        fig.savefig(os.path.join(result_path, "%s-score.pdf" % space), bbox_inches='tight', dpi=300)
-        fig = sc.pl.umap(adata[meta["method"]==method_set[0]], color="margin", palette=palette, groups=groups, return_fig=True, size=size)
-        fig.savefig(os.path.join(result_path, "%s-margin.pdf" % space), bbox_inches='tight', dpi=300)
-