import os import pandas as pd import numpy as np import math import datetime as dt import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Dropout from tensorflow.keras.layers import LSTM
import matplotlib.pyplot as plt from itertools import cycle import plotly.graph_objects as go import plotly.express as px from plotly.subplots import make_subplots
maindf=pd.read_csv('../input/bitcoin-stock-data-sept-17-2014-august-24-2021/BTC-USD.csv') print('Total number of days present in the dataset: ',maindf.shape[0]) print('Total number of fields present in the dataset: ',maindf.shape[1])
Total number of days present in the dataset: 2713 Total number of fields present in the dataset: 7 maindf.shape (2713, 7) maindf.head()
Date Open High Low Close Adj Close Volume 0 2014-09-17 465.864014 468.174011 452.421997 457.334015 457.334015 21056800 1 2014-09-18 456.859985 456.859985 413.104004 424.440002 424.440002 34483200 2 2014-09-19 424.102997 427.834991 384.532013 394.795990 394.795990 37919700 3 2014-09-20 394.673004 423.295990 389.882996 408.903992 408.903992 36863600 4 2014-09-21 408.084991 412.425995 393.181000 398.821014 398.821014 26580100 maindf.tail()
Date Open High Low Close Adj Close Volume 2708 2022-02-15 42586.464844 44667.218750 42491.035156 44575.203125 44575.203125 22721659051 2709 2022-02-16 44578.277344 44578.277344 43456.691406 43961.859375 43961.859375 19792547657 2710 2022-02-17 43937.070313 44132.972656 40249.371094 40538.011719 40538.011719 26246662813 2711 2022-02-18 40552.132813 40929.152344 39637.617188 40030.976563 40030.976563 23310007704 2712 2022-02-19 40022.132813 40246.027344 40010.867188 40126.429688 40126.429688 22263900160 maindf.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2713 entries, 0 to 2712 Data columns (total 7 columns):
0 Date 2713 non-null object
1 Open 2713 non-null float64
2 High 2713 non-null float64
3 Low 2713 non-null float64
4 Close 2713 non-null float64
5 Adj Close 2713 non-null float64
6 Volume 2713 non-null int64
dtypes: float64(5), int64(1), object(1)
maindf.describe()
Open High Low Close Adj Close Volume count 2713.000000 2713.000000 2713.000000 2713.000000 2713.000000 2.713000e+03 mean 11311.041069 11614.292482 10975.555057 11323.914637 11323.914637 1.470462e+10 std 16106.428891 16537.390649 15608.572560 16110.365010 16110.365010 2.001627e+10 min 176.897003 211.731003 171.509995 178.102997 178.102997 5.914570e+06 25% 606.396973 609.260986 604.109985 606.718994 606.718994 7.991080e+07 50% 6301.569824 6434.617676 6214.220215 6317.609863 6317.609863 5.098183e+09 75% 10452.399414 10762.644531 10202.387695 10462.259766 10462.259766 2.456992e+10 max 67549.734375 68789.625000 66382.062500 67566.828125 67566.828125 3.509679e+11 #Checking for Null Values
maindf=maindf.dropna() print('Null Values:',maindf.isnull().values.sum()) print('NA values:',maindf.isnull().values.any())
Null Values: 0 NA values: False #Final shape of the dataset after dealing with null values maindf.shape
(2713, 7)
sd=maindf.iloc[0][0] ed=maindf.iloc[-1][0] print('Starting Date',sd) print('Ending Date',ed)
Starting Date 2014-09-17 Ending Date 2022-02-19
StockPrice Analysis from Start maindf['Date'] = pd.to_datetime(maindf['Date'], format='%Y-%m-%d')
y_2014 = maindf.loc[(maindf['Date'] >= '2014-09-17') & (maindf['Date'] < '2014-12-31')] y_2014.drop(y_2014[['Adj Close','Volume']],axis=1)
Date Open High Low Close 0 2014-09-17 465.864014 468.174011 452.421997 457.334015 1 2014-09-18 456.859985 456.859985 413.104004 424.440002 2 2014-09-19 424.102997 427.834991 384.532013 394.795990 3 2014-09-20 394.673004 423.295990 389.882996 408.903992 4 2014-09-21 408.084991 412.425995 393.181000 398.821014 ... ... ... ... ... ... 100 2014-12-26 319.152008 331.424011 316.627014 327.924011 101 2014-12-27 327.583008 328.911011 312.630005 315.863007 102 2014-12-28 316.160004 320.028015 311.078003 317.239014 103 2014-12-29 317.700989 320.266998 312.307007 312.670013 104 2014-12-30 312.718994 314.808990 309.372986 310.737000 105 rows × 5 columns #Grouping By Mountwise monthvise= y_2014.groupby(y_2014['Date'].dt.strftime('%B'))[['Open','Close']].mean() new_order = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] monthvise = monthvise.reindex(new_order, axis=0) monthvise
Date Open Close January NaN NaN February NaN NaN March NaN NaN April NaN NaN May NaN NaN June NaN NaN July NaN NaN August NaN NaN September 412.654003 407.182428 October 365.748000 364.148873 November 364.850235 366.099799 December 344.146864 341.970366
#PLotting Bargraph fig = go.Figure()
fig.add_trace(go.Bar( x=monthvise.index, y=monthvise['Open'], name='Stock Open Price', marker_color='crimson' )) fig.add_trace(go.Bar( x=monthvise.index, y=monthvise['Close'], name='Stock Close Price', marker_color='lightsalmon' ))
fig.update_layout(barmode='group', xaxis_tickangle=-45, title='Monthwise comparision between Stock open and close price') fig.show()
Group By Months About Lows and Highs y_2014.groupby(y_2014['Date'].dt.strftime('%B'))['Low'].min() monthvise_high = y_2014.groupby(maindf['Date'].dt.strftime('%B'))['High'].max() monthvise_high = monthvise_high.reindex(new_order, axis=0)
monthvise_low = y_2014.groupby(y_2014['Date'].dt.strftime('%B'))['Low'].min() monthvise_low = monthvise_low.reindex(new_order, axis=0)
fig = go.Figure() fig.add_trace(go.Bar( x=monthvise_high.index, y=monthvise_high, name='Stock high Price', marker_color='rgb(0, 153, 204)' )) fig.add_trace(go.Bar( x=monthvise_low.index, y=monthvise_low, name='Stock low Price', marker_color='rgb(255, 128, 0)' )) fig.update_layout(barmode='group', title=' Monthwise High and Low stock price') fig.show()
names = cycle(['Stock Open Price','Stock Close Price','Stock High Price','Stock Low Price'])
fig = px.line(y_2014, x=y_2014.Date, y=[y_2014['Open'], y_2014['Close'], y_2014['High'], y_2014['Low']], labels={'Date': 'Date','value':'Stock value'}) fig.update_layout(title_text='Stock analysis chart', font_size=15, font_color='black',legend_title_text='Stock Parameters') fig.for_each_trace(lambda t: t.update(name = next(names))) fig.update_xaxes(showgrid=False) fig.update_yaxes(showgrid=False)
fig.show() #Using Close Data for LSTM closedf = maindf[['Date','Close']] print("Shape of close dataframe:", closedf.shape) Shape of close dataframe: (2713, 2) fig = px.line(closedf, x=closedf.Date, y=closedf.Close,labels={'date':'Date','close':'Close Stock'}) fig.update_traces(marker_line_width=2, opacity=0.8, marker_line_color='orange') fig.update_layout(title_text='Whole period of timeframe of Bitcoin close price 2014-2022', plot_bgcolor='white', font_size=15, font_color='black') fig.update_xaxes(showgrid=False) fig.update_yaxes(showgrid=False) fig.show() closedf = closedf[closedf['Date'] > '2021-02-19'] close_stock = closedf.copy() print("Total data for prediction: ",closedf.shape[0]) Total data for prediction: 365 closedf
Date Close
2348 2021-02-20 56099.519531 2349 2021-02-21 57539.945313 2350 2021-02-22 54207.320313 2351 2021-02-23 48824.425781 2352 2021-02-24 49705.332031 ... ... ... 2708 2022-02-15 44575.203125 2709 2022-02-16 43961.859375 2710 2022-02-17 40538.011719 2711 2022-02-18 40030.976563 2712 2022-02-19 40126.429688 365 rows × 2 columns fig = px.line(closedf, x=closedf.Date, y=closedf.Close,labels={'date':'Date','close':'Close Stock'}) fig.update_traces(marker_line_width=2, opacity=0.8, marker_line_color='orange') fig.update_layout(title_text='Considered period to predict Bitcoin close price', plot_bgcolor='white', font_size=15, font_color='black') fig.update_xaxes(showgrid=False) fig.update_yaxes(showgrid=False) fig.show() #Deleting date column and normalizing using MinMax Scaler
del closedf['Date'] scaler=MinMaxScaler(feature_range=(0,1)) closedf=scaler.fit_transform(np.array(closedf).reshape(-1,1)) print(closedf.shape)
(365, 1)
training_size=int(len(closedf)*0.60) test_size=len(closedf)-training_size train_data,test_data=closedf[0:training_size,:],closedf[training_size:len(closedf),:1] print("train_data: ", train_data.shape) print("test_data: ", test_data.shape)
train_data: (219, 1) test_data: (146, 1) #Transforming the Close price based on Time-series-analysis forecasting requirement , Here we will take 15
def create_dataset(dataset, time_step=1): dataX, dataY = [], [] for i in range(len(dataset)-time_step-1): a = dataset[i:(i+time_step), 0] ###i=0, 0,1,2,3-----99 100 dataX.append(a) dataY.append(dataset[i + time_step, 0]) return np.array(dataX), np.array(dataY)
time_step = 15
#Using Train Test Split Model X_train, y_train = create_dataset(train_data, time_step) X_test, y_test = create_dataset(test_data, time_step)
print("X_train: ", X_train.shape) print("y_train: ", y_train.shape) print("X_test: ", X_test.shape) print("y_test", y_test.shape)
X_train: (203, 15) y_train: (203,) X_test: (130, 15) y_test (130,)
X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1) X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)
print("X_train: ", X_train.shape) print("X_test: ", X_test.shape)
X_train: (203, 15, 1) X_test: (130, 15, 1)
model=Sequential()
model.add(LSTM(10,input_shape=(None,1),activation="relu"))
model.add(Dense(1))
model.compile(loss="mean_squared_error",optimizer="adam") Storing Input of LSTM as history history = model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=200,batch_size=32,verbose=1)
Epoch 1/200 7/7 [==============================] - 3s 180ms/step - loss: 0.1612 - val_loss: 0.2229 Epoch 2/200 7/7 [==============================] - 0s 55ms/step - loss: 0.1433 - val_loss: 0.1760 Epoch 3/200 7/7 [==============================] - 0s 61ms/step - loss: 0.1147 - val_loss: 0.1310
Epoch 197/200 7/7 [==============================] - 0s 39ms/step - loss: 0.0033 - val_loss: 0.0028 Epoch 198/200 7/7 [==============================] - 0s 33ms/step - loss: 0.0035 - val_loss: 0.0027 Epoch 199/200 7/7 [==============================] - 0s 37ms/step - loss: 0.0031 - val_loss: 0.0027 Epoch 200/200 7/7 [==============================] - 0s 38ms/step - loss: 0.0038 - val_loss: 0.0028
import matplotlib.pyplot as plt
loss = history.history['loss'] val_loss = history.history['val_loss']
epochs = range(len(loss))
plt.plot(epochs, loss, 'r', label='Training loss') plt.plot(epochs, val_loss, 'b', label='Validation loss') plt.title('Training and validation loss') plt.legend(loc=0) plt.figure() plt.show()
train_predict=model.predict(X_train) test_predict=model.predict(X_test) train_predict.shape, test_predict.shape ((203, 1), (130, 1))
train_predict = scaler.inverse_transform(train_predict) test_predict = scaler.inverse_transform(test_predict) original_ytrain = scaler.inverse_transform(y_train.reshape(-1,1)) original_ytest = scaler.inverse_transform(y_test.reshape(-1,1))
print("Train data RMSE: ", math.sqrt(mean_squared_error(original_ytrain,train_predict))) print("Train data MSE: ", mean_squared_error(original_ytrain,train_predict)) print("Train data MAE: ", mean_absolute_error(original_ytrain,train_predict)) print("-------------------------------------------------------------------------------------") print("Test data RMSE: ", math.sqrt(mean_squared_error(original_ytest,test_predict))) print("Test data MSE: ", mean_squared_error(original_ytest,test_predict)) print("Test data MAE: ", mean_absolute_error(original_ytest,test_predict))
Train data RMSE: 2132.5617172023058 Train data MSE: 4547819.477676847 Train data MAE: 1696.3409213608377
Test data RMSE: 2009.5443706729727 Test data MSE: 4038268.5777034336 Test data MAE: 1557.741887036538
print("Train data explained variance regression score:", explained_variance_score(original_ytrain, train_predict)) print("Test data explained variance regression score:", explained_variance_score(original_ytest, test_predict))
Train data explained variance regression score: 0.9480010690935174 Test data explained variance regression score: 0.9529088548793871
print("Train data R2 score:", r2_score(original_ytrain, train_predict)) print("Test data R2 score:", r2_score(original_ytest, test_predict))
Train data R2 score: 0.9477311182484093 Test data R2 score: 0.9505016260136052
Regression Loss Mean Gamma deviance regression loss (MGD) and Mean Poisson deviance regression loss (MPD)
print("Train data MGD: ", mean_gamma_deviance(original_ytrain, train_predict)) print("Test data MGD: ", mean_gamma_deviance(original_ytest, test_predict)) print("----------------------------------------------------------------------") print("Train data MPD: ", mean_poisson_deviance(original_ytrain, train_predict)) print("Test data MPD: ", mean_poisson_deviance(original_ytest, test_predict))
Train data MPD: 100.24364567170709 Test data MPD: 79.20758987231272
look_back=time_step trainPredictPlot = np.empty_like(closedf) trainPredictPlot[:, :] = np.nan trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict print("Train predicted data: ", trainPredictPlot.shape)
#Shifting test predictions for plotting testPredictPlot = np.empty_like(closedf) testPredictPlot[:, :] = np.nan testPredictPlot[len(train_predict)+(look_back*2)+1:len(closedf)-1, :] = test_predict print("Test predicted data: ", testPredictPlot.shape) names = cycle(['Original close price','Train predicted close price','Test predicted close price'])
plotdf = pd.DataFrame({'date': close_stock['Date'], 'original_close': close_stock['Close'], 'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(), 'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})
fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'], plotdf['test_predicted_close']], labels={'value':'Stock price','date': 'Date'}) fig.update_layout(title_text='Comparision between original close price vs predicted close price', plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price') fig.for_each_trace(lambda t: t.update(name = next(names)))
fig.update_xaxes(showgrid=False) fig.update_yaxes(showgrid=False) fig.show()
Train predicted data: (365, 1) Test predicted data: (365, 1)
x_input=test_data[len(test_data)-time_step:].reshape(1,-1) temp_input=list(x_input) temp_input=temp_input[0].tolist()
from numpy import array
lst_output=[] n_steps=time_step i=0 pred_days = 30 while(i<pred_days):
if(len(temp_input)>time_step):
x_input=np.array(temp_input[1:])
#print("{} day input {}".format(i,x_input)) x_input = x_input.reshape(1,-1) x_input = x_input.reshape((1, n_steps, 1))
yhat = model.predict(x_input, verbose=0)
#print("{} day output {}".format(i,yhat)) temp_input.extend(yhat[0].tolist()) temp_input=temp_input[1:] #print(temp_input)
lst_output.extend(yhat.tolist())
i=i+1
else:
x_input = x_input.reshape((1, n_steps,1))
yhat = model.predict(x_input, verbose=0)
temp_input.extend(yhat[0].tolist())
lst_output.extend(yhat.tolist())
i=i+1
print("Output of predicted next days: ", len(lst_output))
Output of predicted next days: 30
last_days=np.arange(1,time_step+1) day_pred=np.arange(time_step+1,time_step+pred_days+1) print(last_days) print(day_pred) [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] [16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45]
temp_mat = np.empty((len(last_days)+pred_days+1,1)) temp_mat[:] = np.nan temp_mat = temp_mat.reshape(1,-1).tolist()[0]
last_original_days_value = temp_mat next_predicted_days_value = temp_mat
last_original_days_value[0:time_step+1] = scaler.inverse_transform(closedf[len(closedf)-time_step:]).reshape(1,-1).tolist()[0] next_predicted_days_value[time_step+1:] = scaler.inverse_transform(np.array(lst_output).reshape(-1,1)).reshape(1,-1).tolist()[0]
new_pred_plot = pd.DataFrame({ 'last_original_days_value':last_original_days_value, 'next_predicted_days_value':next_predicted_days_value }) names = cycle(['Last 15 days close price','Predicted next 30 days close price'])
fig = px.line(new_pred_plot,x=new_pred_plot.index, y=[new_pred_plot['last_original_days_value'], new_pred_plot['next_predicted_days_value']], labels={'value': 'Stock price','index': 'Timestamp'}) fig.update_layout(title_text='Compare last 15 days vs next 30 days', plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Close Price')
fig.for_each_trace(lambda t: t.update(name = next(names))) fig.update_xaxes(showgrid=False) fig.update_yaxes(showgrid=False) fig.show()
lstmdf=closedf.tolist() lstmdf.extend((np.array(lst_output).reshape(-1,1)).tolist()) lstmdf=scaler.inverse_transform(lstmdf).reshape(1,-1).tolist()[0]
names = cycle(['Close price'])
fig = px.line(lstmdf,labels={'value': 'Stock price','index': 'Timestamp'}) fig.update_layout(title_text='Plotting whole closing stock price with prediction', plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Stock')
fig.for_each_trace(lambda t: t.update(name = next(names)))
fig.update_xaxes(showgrid=False) fig.update_yaxes(showgrid=False) fig.show()