forked from gianlucadetommaso/volatile
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluation.py
executable file
·142 lines (112 loc) · 5.5 KB
/
evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python
from volatile import *
from models import *
from copy import deepcopy
def second_moment_loss(tt: np.array, mu: np.array, sigma: np.array, logp: np.array) -> float:
"""
Because log-price likelihood is Gaussian, standardizing the test data should give a second moment (i.e. squared
expectation plus variance) of 1.
Parameters
----------
tt: np.array
Array of times.
mu: np.array
Mean parameters.
sigma: np.array
Standard deviation parameters.
logp: np.array
Log-prices at stock-level.
"""
est, std = estimate_logprice_statistics(mu, sigma, tt)
scores = (est - logp) / std
return np.abs(np.mean(scores ** 2) - 1)
def negative_loglikelihood(model: tfd.JointDistributionSequentialAutoBatched, params: tuple, logp: np.array) -> float:
"""
It returns the negative log-likelihood of the model.
Parameters
----------
model: tfd.JointDistributionSequentialAutoBatched
Probabilistic model.
params: tuple
Parameters of the model.
logp: np.array
Log-prices at stock-level.
"""
return -model.log_prob_parts(list(params) + [logp])[-1].numpy()
if __name__ == "__main__":
cli = ArgumentParser("Volatile: your day-to-day trading companion.", formatter_class=ArgumentDefaultsHelpFormatter)
cli.add_argument("-s", "--symbols", type=str, nargs="+", help=SUPPRESS)
cli.add_argument(
"--days", type=int, default=5, help="This refers to the number of last days to hold out for validation."
)
args = cli.parse_args()
print("\nDownloading all available closing prices in the last year...")
if args.symbols is None:
with open("symbols_list.txt", "r") as file:
args.symbols = file.readlines()[0].split(" ")
data = download_tickers_info(args.symbols)
tickers = data["tickers"]
log_price = np.log(data["price"])
# convert currencies to most frequent one
for i, currency in enumerate(data["currencies"]):
if currency != data["default_currency"]:
log_price[i] = convert_currency(log_price[i], np.array(data["exchange_rates"][currency]), type="forward")
num_stocks = log_price.shape[0]
t = log_price[:, : -args.days].shape[1]
## independent stock model
print("\ns_2 model evaluation...")
order = 2
info = dict()
info["tt"] = (np.linspace(1 / t, 1, t) ** np.arange(order + 1).reshape(-1, 1)).astype("float32")
info["order_scale"] = np.linspace(1 / (order + 1), 1, order + 1)[::-1].astype("float32")[None, :]
info["num_stocks"] = num_stocks
tt_pred = ((1 + (np.arange(1, 1 + args.days) / t)) ** np.arange(order + 1).reshape(-1, 1)).astype("float32")
params = train_s(log_price[:, : -args.days], info)
phi, psi = params
sm_loss = second_moment_loss(tt_pred, phi.numpy(), psi.numpy(), log_price[:, -args.days :])
info_pred = deepcopy(info)
info_pred["tt"] = tt_pred
pred_model = s_model(info_pred)
nllkd = negative_loglikelihood(pred_model, params, log_price[:, -args.days :])
print("Second moment loss: ", sm_loss)
print("Negative log-likelihood: ", nllkd)
## Model without clusters
print("\nmsis_2 model evaluation...")
order = 2
info = extract_hierarchical_info(data["sectors"], data["industries"])
info["tt"] = (np.linspace(1 / t, 1, t) ** np.arange(order + 1).reshape(-1, 1)).astype("float32")
info["order_scale"] = np.linspace(1 / (order + 1), 1, order + 1)[::-1].astype("float32")[None, :]
tt_pred = ((1 + (np.arange(1, 1 + args.days) / t)) ** np.arange(order + 1).reshape(-1, 1)).astype("float32")
params = train(log_price[:, : -args.days], info)
phi, psi = params[-2:]
sm_loss = second_moment_loss(tt_pred, phi.numpy(), psi.numpy(), log_price[:, -args.days :])
info_pred = deepcopy(info)
info_pred["tt"] = tt_pred
pred_model = define_model(info_pred)
nllkd = negative_loglikelihood(pred_model, params, log_price[:, -args.days :])
print("Second moment loss: ", sm_loss)
print("Negative log-likelihood: ", nllkd)
## Model wit clusters
print("\nmsis_mcs_2 model evaluation...")
## Estimate clusters
order = 51
info = extract_hierarchical_info(data["sectors"], data["industries"])
info["tt"] = (np.linspace(1 / t, 1, t) ** np.arange(order + 1).reshape(-1, 1)).astype("float32")
info["order_scale"] = np.linspace(1 / (order + 1), 1, order + 1)[::-1].astype("float32")[None, :]
phi_m, psi_m, phi_s, psi_s, phi_i, psi_i, phi, psi = train(log_price[:, : -args.days], info, num_steps=50000)
clusters_id = estimate_clusters(tickers, phi.numpy(), info["tt"])
order = 2
info = extract_hierarchical_info(data["sectors"], data["industries"])
info["tt"] = (np.linspace(1 / t, 1, t) ** np.arange(order + 1).reshape(-1, 1)).astype("float32")
info["order_scale"] = np.linspace(1 / (order + 1), 1, order + 1)[::-1].astype("float32")[None, :]
info["clusters_id"] = clusters_id
tt_pred = ((1 + (np.arange(1, 1 + args.days) / t)) ** np.arange(order + 1).reshape(-1, 1)).astype("float32")
params = train(log_price[:, : -args.days], info)
phi, psi, chi_m, xi_m, chi_c, xi_c, chi, xi = params[6:]
sm_loss = second_moment_loss(tt_pred, (phi + chi).numpy(), (psi + xi).numpy(), log_price[:, -args.days :])
info_pred = deepcopy(info)
info_pred["tt"] = tt_pred
pred_model = define_model(info_pred)
nllkd = negative_loglikelihood(pred_model, params, log_price[:, -args.days :])
print("Second moment loss: ", sm_loss)
print("Negative log-likelihood: ", nllkd)