-
Notifications
You must be signed in to change notification settings - Fork 1
/
database_preprocess.py
51 lines (37 loc) · 1.64 KB
/
database_preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import numpy as np
from collections import defaultdict
import pandas as pd
################# for running Niches ###############################
ligand_list = []
receptor_list = []
annotation_list = []
reference_list = []
cell_chat_file = '/cluster/home/t116508uhn/Human-2020-Jin-LR-pairs_cellchat.csv'
df = pd.read_csv(cell_chat_file)
for i in range (0, df["ligand_symbol"].shape[0]):
ligand = df["ligand_symbol"][i]
if df["annotation"][i] == 'ECM-Receptor': # since we are considering cell to cell communication
continue
receptor_symbol_list = df["receptor_symbol"][i]
receptor_symbol_list = receptor_symbol_list.split("&")
for receptor in receptor_symbol_list:
ligand_list.append(ligand)
receptor_list.append(receptor)
annotation_list.append(df["annotation"][i])
reference_list.append(df['evidence'][i])
nichetalk_file = '/cluster/home/t116508uhn/NicheNet-LR-pairs.csv'
df = pd.read_csv(nichetalk_file)
for i in range (0, df["from"].shape[0]):
ligand = df["from"][i]
receptor = df["to"][i]
ligand_list.append(ligand)
receptor_list.append(receptor)
annotation_list.append(' ')
reference_list.append(df['source'][i])
# make a csv file with three columns: Ligand, Receptor, Annotation
csv_record = []
csv_record.append(['Ligand', 'Receptor', 'Annotation', 'Reference'])
for i in range (0, len(ligand_list)):
csv_record.append([ligand_list[i], receptor_list[i], annotation_list[i], reference_list[i]])
df = pd.DataFrame(csv_record) # output 4
df.to_csv('/cluster/home/t116508uhn/64630/NEST_database.csv', index=False, header=False)