-
Notifications
You must be signed in to change notification settings - Fork 2
/
risky_smart_weights.py
248 lines (196 loc) · 10.9 KB
/
risky_smart_weights.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta, timezone
import random
def last_trading_day():
rightnow = datetime.now(timezone(timedelta(hours=-5), 'EST'))
# US Markets close at 4pm, but afterhours trading ends at 8pm.
# yFinance stubbornly only gives the day's data after 8pm, so we will wait until 9pm to pull data from
# the current day.
market_close = rightnow.replace(hour=21, minute=0, second=0, microsecond=0)
if rightnow < market_close:
DELTA = 1
# If it is saturday or sunday
elif rightnow.weekday() >= 5:
DELTA = 1
else:
DELTA = 0
start_date = (datetime.now() - timedelta(days=15)).strftime("%Y-%m-%d")
end_date = (datetime.now() - pd.tseries.offsets.BDay(DELTA)).strftime("%Y-%m-%d")
MarketIndex = "^GSPC" # We can use the S&P 500's data to see the last day where we have data
market_hist = yf.Ticker(MarketIndex).history(start=start_date, end=end_date).filter(like="Close").dropna()
latest_day = market_hist.index[-1]
return latest_day.strftime("%Y-%m-%d")
# get_stock_beta produces the beta of a specified ticker
# Inputs:
# stock_returns_series is of type Series, represents the column of a dataframe of the returns of the price
def get_weight_list(k):
# We will pick k random numbers from a range of 250 numbers
# random.sample is O(n), so we want to minimize this number
random_num_list = random.sample(range(250), k)
listsum = sum(random_num_list)
for i in range(len(random_num_list)):
random_num_list[i] /= listsum
return random_num_list
def get_stock_beta(stock_returns_series, marketVar, market_hist):
binary_portfolio = market_hist[["Returns"]].copy()
binary_portfolio["stock"] = stock_returns_series
return (binary_portfolio.cov() / marketVar)['stock'].loc["Returns"]
#get_all_betas takes a list of tickers and outputs a list of all the betas of every
# ticker in the list.
def get_all_betas(ticker_list, ticker_hist, marketVar, market_hist):
betas = {}
for ticker in ticker_list:
ticker_returns = ticker_hist[ticker][['Close']].pct_change()*100
betas[ticker] = get_stock_beta(ticker_returns['Close'], marketVar, market_hist)
return betas
def get_option_interest(ticker):
stock = yf.Ticker(ticker)
options = stock.option_chain(stock.options[0])
calls = pd.DataFrame().append(options.calls)
puts = pd.DataFrame().append(options.puts)
option_interest = calls.loc[calls['inTheMoney']]['openInterest'].sum() + puts.loc[puts['inTheMoney']]['openInterest'].sum()
return option_interest
def get_daily_volume(ticker, start, end, ticker_hist):
filled_in_hist = ticker_hist.copy()
filled_in_hist.fillna(0, inplace=True)
volume = ticker_hist[ticker].loc[pd.to_datetime(start) : pd.to_datetime(end)].Volume.mean()
return volume
def import_options(tickerlist, opt_start, opt_end, tickerhist):
option_interest_dict = {}
for ticker in tickerlist:
try:
open_interest = get_option_interest(ticker)
daily_vol = get_daily_volume(ticker, opt_start, opt_end, tickerhist)
option_interest_dict[ticker] = open_interest / daily_vol
# yFinance's historical data is sometimes incomplete. This try except will catch any tickers that have insufficient data and drops them from our analysis.
except IndexError as error:
print(f'Dropped {ticker} - no data')
return option_interest_dict
def generate_risky_portfolio(tickerlist: list, totalspend: int):
ticker_hist = yf.download(
tickers = " ".join(tickerlist),
# Download Data From the past 6 months
period = "6mo",
interval = "1d",
group_by = 'ticker',
threads = True,
)
# Data Cleanup
ticker_hist.dropna(how='all', inplace=True)
ticker_hist.fillna(method='ffill', inplace=True)
ticker_hist.fillna(method='bfill', inplace=True)
start_date = (datetime.now() - timedelta(days=60)).strftime("%Y-%m-%d")
end_date = last_trading_day()
MarketIndex = "^GSPC" # We will compare each stock's covariance to the S&P 500
market_hist = yf.Ticker(MarketIndex).history(start=start_date, end=end_date).filter(like="Close")
market_hist["Returns"] = market_hist["Close"].pct_change()*100
market_var = market_hist["Returns"].var()
beta_dict = get_all_betas(tickerlist, ticker_hist, market_var, market_hist)
beta_df = pd.DataFrame.from_dict(beta_dict, orient='index')
beta_df.columns = ['Beta']
beta_df.index.name = 'Ticker'
#Sorts the dictionary from lowest beta to highest beta
sorted_beta_df = beta_df.sort_values('Beta').copy()
if len(sorted_beta_df) > 10:
#Creates a dataframe with the 10 lowest betas and 10 highest betas.
lower_bound = sorted_beta_df.iloc[:10]
upper_bound = sorted_beta_df.iloc[-10:]
# Gets the final stock list we want
#Calculates the mean of the upper and lower list of betas
avg_upper = upper_bound['Beta'].mean()
avg_lower = lower_bound['Beta'].mean()
# Compares the averages of the two and makes the one with the higher absolute value a
# candidate for our final list of stocks.
if abs(avg_upper) >= abs(avg_lower):
beta_final_ticker_list = list(upper_bound.index)
else:
beta_final_ticker_list = list(lower_bound.index)
else:
beta_final_ticker_list = list(sorted_beta_df.index)
# Options Analysis
opt_start = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d")
opt_end = end_date
option_interest_dict = import_options(tickerlist, opt_start, opt_end, ticker_hist)
if len(option_interest_dict.keys()) >= len(beta_final_ticker_list):
# Creates a dataframe that corresponds tickers with the option interest over the daily volume
option_interest_df = pd.DataFrame.from_dict(option_interest_dict, orient='index')
option_interest_df.columns = ['Option Interest / dailyVol']
sorted_option_interest_df = option_interest_df.sort_values('Option Interest / dailyVol').copy()
sorted_option_interest_df.dropna(inplace=True)
final_option_df = sorted_option_interest_df.tail(10)
opt_tickers = list(final_option_df.index)
print('Found an option portfolio')
else:
option_interest_df = False
print('Error - Insufficient options data for inputted tickers, skipping options analysis...')
# Monte Carlo Analysis
trials = 250
# We will collect the risk coefficients of all portfolios in this dictionary
# Ultimately, we are comparing the best stocks obtained by the beta and options method,
# and will take the best one from whichever method.
risk_coefficients = {}
# Don't mind the O(4n^2), it's O(e^2ln(2n)) :^)
for i in range(trials):
portfolio = pd.DataFrame(index=pd.date_range(start=start_date, end=end_date))
portfolio_shares = {}
weight_list = get_weight_list(len(beta_final_ticker_list))
total = 0
for i in range(len(beta_final_ticker_list)):
allocation = weight_list[i] * totalspend
# Simulate purchasing $100,000 worth of shares at the beginning of the time period
initial_ticker_price = ticker_hist[beta_final_ticker_list[i]].Close.iloc[0]
shares = allocation / initial_ticker_price
# Alongside this, create a portfolio of shares with the same weightings, but priced in the present.
present_ticker_price = ticker_hist[beta_final_ticker_list[i]].Close.loc[end_date]
shares_present = allocation / present_ticker_price
portfolio_shares[beta_final_ticker_list[i]] = shares_present
total += present_ticker_price * shares_present
# Simulate Purchasing of the shares and perform analysis on the past data
portfolio[beta_final_ticker_list[i]] = shares * ticker_hist[beta_final_ticker_list[i]].loc[pd.to_datetime(start_date) : pd.to_datetime(end_date)].Close
portfolio.dropna(how='all', inplace=True)
portfolio.fillna(method='ffill', inplace=True)
portfolio.fillna(method='bfill', inplace=True)
portfolio['Total Value'] = portfolio.sum(axis=1)
portfolio['Percent Return'] = portfolio['Total Value'].pct_change() * 100
# Calculate risk coefficient
std = portfolio['Percent Return'].std()
avg_return = portfolio['Percent Return'].mean()
risk_coefficient = (avg_return * std) ** 2
# Save into risk_coefficients dict
risk_coefficients[risk_coefficient] = portfolio_shares
if option_interest_df is not False:
portfolio = pd.DataFrame(index=pd.date_range(start=start_date, end=end_date))
portfolio_shares = {}
weight_list = get_weight_list(len(opt_tickers))
total = 0
for i in range(len(opt_tickers)):
allocation = weight_list[i] * totalspend
# Simulate purchasing $100,000 worth of shares at the beginning of the time period
initial_ticker_price = ticker_hist[opt_tickers[i]].Close.iloc[0]
shares = allocation / initial_ticker_price
# Alongside this, create a portfolio of shares with the same weightings, but priced in the present.
present_ticker_price = ticker_hist[opt_tickers[i]].Close.loc[end_date]
shares_present = allocation / present_ticker_price
portfolio_shares[opt_tickers[i]] = shares_present
total += present_ticker_price * shares_present
# Simulate Purchasing of the shares and perform analysis on the past data
portfolio[opt_tickers[i]] = shares * ticker_hist[opt_tickers[i]].loc[pd.to_datetime(start_date) : pd.to_datetime(end_date)].Close
portfolio.dropna(how='all', inplace=True)
portfolio.fillna(method='ffill', inplace=True)
portfolio.fillna(method='bfill', inplace=True)
portfolio['Total Value'] = portfolio.sum(axis=1)
portfolio['Percent Return'] = portfolio['Total Value'].pct_change() * 100
# Calculate risk coefficient
std = portfolio['Percent Return'].std()
avg_return = portfolio['Percent Return'].mean()
risk_coefficient = (avg_return * std) ** 2
# Save into risk_coefficients dict
risk_coefficients[risk_coefficient] = portfolio_shares
optimal_weight = risk_coefficients[max(risk_coefficients.keys())]
# Create final output portfolio
final_stocks_df = pd.DataFrame.from_dict(optimal_weight, orient='index')
final_stocks_df.reset_index(inplace=True)
final_stocks_df.columns = ['Ticker', 'Shares']
final_stocks_df.set_index('Ticker', inplace=True)
return (final_stocks_df, end_date)