-
Notifications
You must be signed in to change notification settings - Fork 0
/
Missing_Data_Imputation.py
67 lines (54 loc) · 1.42 KB
/
Missing_Data_Imputation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import numpy as np
import pandas
import csv
import sys
import re
import time
from sklearn.preprocessing import Imputer
from itertools import islice
#dataframe = pandas.read_csv('ctrl2.csv', sep= ',', header= None)
dataframe = 'ctrl2.csv'
#print "Dataset Lenght:: ", len(dataframe)
#print "Dataset Shape:: ", dataframe.shape
#print "Dataset:: "
#print dataframe.head(n=6)
with open(dataframe, 'rb') as csvfile_r:
#with open('ctri2_new.csv', 'w') as csvfile_w:
spamreader = csv.reader(csvfile_r, delimiter=',', quotechar='|')
#writer = csv.DictWriter(csvfile_w, fieldnames=fieldnames)
k = 0;
s = 0;
j = 0;
mean = 0;
columns = len(spamreader.next())
'''
for row in spamreader:
Tid=row[1]
if(Tid != 'NA' and Tid != ''):
s += float(Tid);
#print Tid;
j +=1;
#print j;
mean = s / j;
print mean, "\n";
'''
for i in range(1, columns-1):
print i
s = 0;
j = 0;
mean = 0;
for row in islice(spamreader, 1, None):
Tid=row[i]
if(Tid != 'NA' and Tid != ''):
s += float(Tid);
#print Tid;
j +=1;
#print j;
mean = s / j;
print mean, "\n";
#array = dataframe.values
#X = np.array(array)
#print X
#imp = Imputer(missing_values='NA', strategy='mean', axis=0)
#X = imp.fit_transform(X)
print "Done"