forked from gianlucadetommaso/volatile
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathyahoo_finance.py
123 lines (96 loc) · 3.77 KB
/
yahoo_finance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import json
from typing import Dict, Optional
import aiohttp
import numpy as np
import pandas as pd
import logging
logger = logging.getLogger(__name__)
async def download_ticker_sector_industry(session: aiohttp.ClientSession, ticker: str) -> Optional[Dict]:
"""
Download historical data for a single ticker with multithreading. Plus, it scrapes missing stock information.
Parameters
----------
ticker: str
Ticker for which to download historical information.
"""
try:
async with session.get("https://finance.yahoo.com/quote/" + ticker) as response:
html = await response.text()
json_str = html.split("root.App.main =")[1].split("(this)")[0].split(";\n}")[0].strip()
info = json.loads(json_str)["context"]["dispatcher"]["stores"]["QuoteSummaryStore"]["summaryProfile"]
assert (len(info["sector"]) > 0) and (len(info["industry"]) > 0)
return {"SYMBOL": ticker, "SECTOR": info["sector"], "INDUSTRY": info["industry"]}
except Exception as e:
logger.warning(f"Error downloading info for {ticker=}: {e}")
return None
async def _download_single_ticker_chart_data(
session: aiohttp.ClientSession, ticker: str, start: int, end: int, interval: str = "1d"
) -> dict:
"""
Download historical data for a single ticker.
Parameters
----------
ticker: str
Ticker for which to download historical information.
start: int
Start download data from this timestamp date.
end: int
End download data at this timestamp date.
interval: str
Frequency between data.
Returns
-------
data: dict
Scraped dictionary of information.
"""
url = f"https://query1.finance.yahoo.com/v8/finance/chart/{ticker}"
params = dict(period1=start, period2=end, interval=interval.lower(), includePrePost="false")
async with session.get(url, params=params) as response:
data_text = await response.text()
if "Will be right back" in data_text:
raise RuntimeError("*** YAHOO! FINANCE is currently down! ***\n")
else:
response_json = await response.json()
try:
return {
"ticker": ticker,
"quotes": _parse_quotes(response_json),
"currency": extract_currency_from_chart_json(response_json),
}
except Exception as e:
if "error" in response_json.get("chart", {}):
e = response_json["chart"]["error"]
logger.warning(f"Downloading chart data for ticker {ticker} threw error")
return None
def extract_currency_from_chart_json(response_json):
return response_json["chart"]["result"][0]["meta"]["currency"]
def _parse_quotes(response_json: dict) -> pd.DataFrame:
"""
It creates a data frame of adjusted closing prices and volumes. If no adjusted closing
price is available, it sets it equal to closing price.
Parameters
----------
data: dict
Data containing historical information of corresponding stock.
"""
data = response_json["chart"]["result"][0]
quotes = {}
timestamps = data["timestamp"]
indicators = data["indicators"]
ohlc = indicators["quote"][0]
closes = ohlc["close"]
quotes["Volume"] = ohlc["volume"]
try:
adjclose = indicators["adjclose"][0]["adjclose"]
except (KeyError, IndexError):
adjclose = closes
# fix NaNs in the second-last entry of adjusted closing prices
if adjclose[-2] is None:
adjclose[-2] = adjclose[-1]
assert (np.array(adjclose) > 0).all()
quotes["Adj Close"] = adjclose
quotes = pd.DataFrame(quotes)
quotes.index = pd.to_datetime(timestamps, unit="s").date
quotes.sort_index(inplace=True)
quotes = quotes.loc[~quotes.index.duplicated(keep="first")]
return quotes