forked from ConnerFlansburg/WeatherData
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
133 lines (99 loc) · 4.76 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""
main.py creates, trains, and tests machine learning models when provided with test & training data sets.
Authors/Contributors: Dr. Dimitrios Diochnos, Conner Flansburg
Github Repo:
"""
import pathlib as path
import typing as typ
import numpy as np
import pandas as pd
from sklearn.svm import SVC
import sys
import logging as log
import traceback
from sklearn.metrics import accuracy_score
from pyfiglet import Figlet
SEED: int = 498
HDR = '*' * 6
SUCCESS = u' \u2713\n'+'\033[0m' # print the checkmark & reset text color
OVERWRITE = '\r' + '\033[32;1m' + HDR # overwrite previous text & set the text color to green
NO_OVERWRITE = '\033[32;1m' + HDR # NO_OVERWRITE colors lines green that don't use overwrite
SYSOUT = sys.stdout
# TODO: Add documentation
def main(training_filename: str, test_filename: str) -> None:
title: str = Figlet(font='larry3d').renderText('Weather Data')
SYSOUT.write(f'\033[34;1m{title}\033[00m') # formatted start up message
SYSOUT.write("\033[32;1mProgram Initialized Successfully\033[00m\n")
train_and_test(training_filename, test_filename) # train & test the model(s)
def train_and_test(training_filename: str, test_filename: str):
# * Read the Two CSV Files into Dataframes * #
SYSOUT.write(HDR + 'Reading in CSVs...')
training: np.ndarray = np.genfromtxt(training_filename, delimiter=',', skip_header=1)
testing: np.ndarray = np.genfromtxt(test_filename, delimiter=',', skip_header=1)
SYSOUT.write(OVERWRITE + ' CSVs Parsed '.ljust(50, '-') + SUCCESS)
# * Get the Labels & Features from the Training Data
ftrs, labels = formatForSciKit(training)
# * Create the SVC Model * #
SYSOUT.write(HDR + 'Creating SVC Model...')
SVC_model: SVC = SVC(kernel='sigmoid', random_state=SEED)
SVC_model.fit(ftrs, labels) # train the model
SYSOUT.write(OVERWRITE + ' SVC Model Created '.ljust(50, '-') + SUCCESS)
# * Test the Model * #
SYSOUT.write(HDR + 'Testing SVC Model...')
ftrs, test_labels = formatForSciKit(testing)
prediction_score = SVC_model.predict(ftrs) # make prediction
score = accuracy_score(test_labels, prediction_score) # test prediction
mType: str = 'SVC'
SYSOUT.write(OVERWRITE + ' SVC Model Tested '.ljust(50, '-') + SUCCESS)
# * Report Result * #
percentScore: float = round(score * 100, 1) # turn the score into a percent with 2 decimal places
if percentScore > 75: # > 75 print in green
SYSOUT.write(f'\r\033[32;1m{mType} Accuracy is: {percentScore}%\033[00m\n')
SYSOUT.flush()
elif 45 < percentScore < 75: # > 45 and < 75 print yellow
SYSOUT.write(f'\r\033[33;1m{mType} Accuracy is: {percentScore}%\033[00m\n')
SYSOUT.flush()
elif percentScore < 45: # < 45 print in red
SYSOUT.write(f'\r\033[91;1m{mType} Accuracy is: {percentScore}%\033[00m\n')
SYSOUT.flush()
else: # don't add color, but print accuracy
SYSOUT.write(f'{mType} Accuracy is: {percentScore}%\n')
SYSOUT.flush()
def formatForSciKit(data: np.ndarray) -> (np.ndarray, np.ndarray):
"""
formatForSciKit takes the input data and converts it into a form that can
be understood by the sklearn package. It does this by separating the features
from their labels and returning them as two different numpy arrays.
:param data: The input data, from a read in CSV.
:type data: np.ndarray
:return: The input file in a form parsable by sklearn.
:rtype: tuple[np.ndarray, np.ndarray]
"""
# create the label array Y (the target of our training)
# from all rows, pick the 0th column
try:
# + data[:, :1] get every row but only the first column
flat = np.ravel(data[:, :1]) # get a list of all the labels as a list of lists & then flatten it
labels = np.array(flat) # convert the label list to a numpy array
# create the feature matrix X ()
# + data[:, 1:] get every row but drop the first column
ftrs = np.array(data[:, 1:]) # get everything BUT the labels/ids
except (TypeError, IndexError) as err:
lineNm = sys.exc_info()[-1].tb_lineno # get the line number of error
msg = f'{str(err)}, line {lineNm}:\ndata = {data}\ndimensions = {data.ndim}'
log.error(msg) # log the error
printError(msg) # print message
traceback.print_stack() # print stack trace
sys.exit(-1) # exit on error; recovery not possible
return ftrs, labels
def printError(message: str) -> None:
"""
printError is used for coloring error messages red.
:param message: The message to be printed.
:type message: str
:return: printError does not return, but rather prints to the console.
:rtype: None
"""
print("\033[91;1m {}\033[00m".format(message))
if __name__ == '__main__':
main()