-
Notifications
You must be signed in to change notification settings - Fork 86
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
yfinace to use for gathering datasets
- Loading branch information
1 parent
9d34012
commit f889e97
Showing
3 changed files
with
6,560 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,318 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# **Loading stock market data using yfiance library**" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import yfinance as yf" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Find the ticker symbols here: https://stockanalysis.com/stocks/" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# ticker define\n", | ||
"ticker = 'NVDA'" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# define the time period\n", | ||
"start_date = '2015-01-01'\n", | ||
"end_date = '2024-11-21'" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"[*********************100%***********************] 1 of 1 completed\n" | ||
] | ||
}, | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div>\n", | ||
"<style scoped>\n", | ||
" .dataframe tbody tr th:only-of-type {\n", | ||
" vertical-align: middle;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe tbody tr th {\n", | ||
" vertical-align: top;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe thead tr th {\n", | ||
" text-align: left;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe thead tr:last-of-type th {\n", | ||
" text-align: right;\n", | ||
" }\n", | ||
"</style>\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr>\n", | ||
" <th>Price</th>\n", | ||
" <th>Adj Close</th>\n", | ||
" <th>Close</th>\n", | ||
" <th>High</th>\n", | ||
" <th>Low</th>\n", | ||
" <th>Open</th>\n", | ||
" <th>Volume</th>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>Ticker</th>\n", | ||
" <th>NVDA</th>\n", | ||
" <th>NVDA</th>\n", | ||
" <th>NVDA</th>\n", | ||
" <th>NVDA</th>\n", | ||
" <th>NVDA</th>\n", | ||
" <th>NVDA</th>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>Date</th>\n", | ||
" <th></th>\n", | ||
" <th></th>\n", | ||
" <th></th>\n", | ||
" <th></th>\n", | ||
" <th></th>\n", | ||
" <th></th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>2015-01-02</th>\n", | ||
" <td>0.483177</td>\n", | ||
" <td>0.50325</td>\n", | ||
" <td>0.50700</td>\n", | ||
" <td>0.49525</td>\n", | ||
" <td>0.50325</td>\n", | ||
" <td>113680000</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>2015-01-05</th>\n", | ||
" <td>0.475016</td>\n", | ||
" <td>0.49475</td>\n", | ||
" <td>0.50475</td>\n", | ||
" <td>0.49250</td>\n", | ||
" <td>0.50325</td>\n", | ||
" <td>197952000</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>2015-01-06</th>\n", | ||
" <td>0.460614</td>\n", | ||
" <td>0.47975</td>\n", | ||
" <td>0.49600</td>\n", | ||
" <td>0.47925</td>\n", | ||
" <td>0.49550</td>\n", | ||
" <td>197764000</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>2015-01-07</th>\n", | ||
" <td>0.459414</td>\n", | ||
" <td>0.47850</td>\n", | ||
" <td>0.48750</td>\n", | ||
" <td>0.47700</td>\n", | ||
" <td>0.48325</td>\n", | ||
" <td>321808000</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>2015-01-08</th>\n", | ||
" <td>0.476696</td>\n", | ||
" <td>0.49650</td>\n", | ||
" <td>0.49950</td>\n", | ||
" <td>0.48375</td>\n", | ||
" <td>0.48400</td>\n", | ||
" <td>283780000</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
"Price Adj Close Close High Low Open Volume\n", | ||
"Ticker NVDA NVDA NVDA NVDA NVDA NVDA\n", | ||
"Date \n", | ||
"2015-01-02 0.483177 0.50325 0.50700 0.49525 0.50325 113680000\n", | ||
"2015-01-05 0.475016 0.49475 0.50475 0.49250 0.50325 197952000\n", | ||
"2015-01-06 0.460614 0.47975 0.49600 0.47925 0.49550 197764000\n", | ||
"2015-01-07 0.459414 0.47850 0.48750 0.47700 0.48325 321808000\n", | ||
"2015-01-08 0.476696 0.49650 0.49950 0.48375 0.48400 283780000" | ||
] | ||
}, | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"# download the data\n", | ||
"df_nvda = yf.download(ticker, start=start_date, end=end_date)\n", | ||
"df_nvda.head()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Define each column in the dataset\n", | ||
"- **Date**: The date of the stock market data\n", | ||
"- **Open**: The opening price of the stock\n", | ||
"- **High**: The highest price of the stock\n", | ||
"- **Low**: The lowest price of the stock\n", | ||
"- **Close**: The closing price of the stock\n", | ||
"- **Adj Close**: The adjusted closing price of the stock\n", | ||
"- **Volume**: The volume of the stock\n", | ||
"- **Ticker**: The ticker symbol of the stock\n", | ||
"- **Name**: The name of the stock\n", | ||
"- **Sector**: The sector of the stock\n", | ||
"- **Industry**: The industry of the stock\n", | ||
"- **Country**: The country of the stock" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# save the data in a csv file\n", | ||
"df_nvda.to_csv('datasets/' + ticker + '_yfinance.csv')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"[*********************100%***********************] 5 of 5 completed\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# download the data from multiple tickers\n", | ||
"tickers = ['AAPL', 'MSFT', 'NVDA', 'GOOGL', 'AMZN']\n", | ||
"start_date = '2010-01-01'\n", | ||
"end_date = '2024-11-21'\n", | ||
"df = yf.download(tickers, start=start_date, end=end_date)\n", | ||
"df.head()\n", | ||
"# save the data in a csv file\n", | ||
"df.to_csv('datasets/' + '_'.join(tickers) + '_yfinance.csv')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"<class 'pandas.core.frame.DataFrame'>\n", | ||
"DatetimeIndex: 3747 entries, 2010-01-04 to 2024-11-20\n", | ||
"Data columns (total 30 columns):\n", | ||
" # Column Non-Null Count Dtype \n", | ||
"--- ------ -------------- ----- \n", | ||
" 0 (Adj Close, AAPL) 3747 non-null float64\n", | ||
" 1 (Adj Close, AMZN) 3747 non-null float64\n", | ||
" 2 (Adj Close, GOOGL) 3747 non-null float64\n", | ||
" 3 (Adj Close, MSFT) 3747 non-null float64\n", | ||
" 4 (Adj Close, NVDA) 3747 non-null float64\n", | ||
" 5 (Close, AAPL) 3747 non-null float64\n", | ||
" 6 (Close, AMZN) 3747 non-null float64\n", | ||
" 7 (Close, GOOGL) 3747 non-null float64\n", | ||
" 8 (Close, MSFT) 3747 non-null float64\n", | ||
" 9 (Close, NVDA) 3747 non-null float64\n", | ||
" 10 (High, AAPL) 3747 non-null float64\n", | ||
" 11 (High, AMZN) 3747 non-null float64\n", | ||
" 12 (High, GOOGL) 3747 non-null float64\n", | ||
" 13 (High, MSFT) 3747 non-null float64\n", | ||
" 14 (High, NVDA) 3747 non-null float64\n", | ||
" 15 (Low, AAPL) 3747 non-null float64\n", | ||
" 16 (Low, AMZN) 3747 non-null float64\n", | ||
" 17 (Low, GOOGL) 3747 non-null float64\n", | ||
" 18 (Low, MSFT) 3747 non-null float64\n", | ||
" 19 (Low, NVDA) 3747 non-null float64\n", | ||
" 20 (Open, AAPL) 3747 non-null float64\n", | ||
" 21 (Open, AMZN) 3747 non-null float64\n", | ||
" 22 (Open, GOOGL) 3747 non-null float64\n", | ||
" 23 (Open, MSFT) 3747 non-null float64\n", | ||
" 24 (Open, NVDA) 3747 non-null float64\n", | ||
" 25 (Volume, AAPL) 3747 non-null int64 \n", | ||
" 26 (Volume, AMZN) 3747 non-null int64 \n", | ||
" 27 (Volume, GOOGL) 3747 non-null int64 \n", | ||
" 28 (Volume, MSFT) 3747 non-null int64 \n", | ||
" 29 (Volume, NVDA) 3747 non-null int64 \n", | ||
"dtypes: float64(25), int64(5)\n", | ||
"memory usage: 907.5 KB\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"df.info()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "data_gathering", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.0" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.