-
Notifications
You must be signed in to change notification settings - Fork 0
/
nnModelTraining.py
109 lines (90 loc) · 3.79 KB
/
nnModelTraining.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Code borrowed from http://machinelearningmastery.com/regression-tutorial-keras-deep-learning-library-python/
# Code borrowed from http://machinelearningmastery.com/regression-tutorial-keras-deep-learning-library-python/
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Activation, Dense
from keras.callbacks import ModelCheckpoint
from keras.wrappers.scikit_learn import KerasRegressor
from keras.optimizers import SGD, RMSprop
from keras.models import load_model
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import time
from collections import Counter
import csv
'''
CREATES INPUT AND OUTPUT VECTORS, TRAINS A NEURAL NETWORK
'''
TRAIN = pd.read_csv('results/training.csv', sep=',', header=None, encoding='utf-8')
TEST = pd.read_csv('results/test.csv', sep=',', header=None, encoding='utf-8')
'''
TRAIN THE NEURAL NETWORK
'''
seed = 7
np.random.seed(seed)
def results_to_csv(filename, results):
with open(filename, 'wb') as f:
writer = csv.writer(f)
writer.writerow(["Id", "Category"])
index = 0
for val in results:
writer.writerow([index, float(val)])
index +=1
def create_neural_network(training, test, trainNewModel = False, modelFilename = 'test.h5'):
''' TRAINS A NEURAL NETWORK AND OUPUTS A PREDICTION OF RESULTS ON TEST DATA'''
dataset = training.values
# split into input (X) and output (Y) variables
input_dimension = len(dataset[0])-1
X = training.iloc[:,1:].values
Y = training.iloc[:,0:1].values
# split test data into input and output variables
TEST_DATA = test.values
TEST_X = test.iloc[:,1:]
TEST_Y = test.iloc[:,:1]
def baseline_model():
# create model
model = Sequential()
# Dense('size_of_output')
model.add(Dense(input_dimension, input_dim=input_dimension, init='normal', activation='relu'))
model.add(Dense(2150, init='normal', activation='relu'))
model.add(Dense(1000, init='normal', activation='relu'))
model.add(Dense(500, init='normal', activation='relu'))
model.add(Dense(20, init='normal', activation='relu'))
model.add(Dense(1, init='normal'))
print
# Compile model
model.compile(loss='mean_squared_error', optimizer='adam')
if trainNewModel == True:
history = model.fit(X,Y,nb_epoch=120,batch_size=10)
model.save_weights("model_year_runtime_v2.h5")
#model.load_weights("model_v1.h5")
predictions = model.predict(np.array(TEST_X))
predictions = [float(x[0]) for x in predictions]
for prediction in predictions:
if prediction > 10:
prediction = 10.0
predictions = pd.Series(predictions)
#results_to_csv('results_1.csv', predictions)
return model, predictions
model, predictions = baseline_model()
results_df = pd.concat([predictions, TEST_Y], axis=1)
print results_df
results_df.columns = ['prediction', 'label']
results_df['diff'] = abs(results_df['prediction'] - results_df['label'])**2
print results_df
print results_df["diff"].mean()
# OPTIONAL: Evaluate using K-Folds
# evaluate model with standardized dataset
#estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=100, batch_size=5, verbose=1)
#kfold = KFold(n_splits=10, random_state=seed)
#results = cross_val_score(estimator, X, Y, cv=kfold)
#print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))
#print time.clock()
if __name__ == "__main__":
try:
create_neural_network(TRAIN, TEST, trainNewModel = True)
except:
raise