Skip to content

Commit

Permalink
yfinace to use for gathering datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
AammarTufail committed Nov 22, 2024
1 parent 9d34012 commit f889e97
Show file tree
Hide file tree
Showing 3 changed files with 6,560 additions and 0 deletions.
318 changes: 318 additions & 0 deletions 13_data_gathering/04_yfinance.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,318 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# **Loading stock market data using yfiance library**"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import yfinance as yf"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Find the ticker symbols here: https://stockanalysis.com/stocks/"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# ticker define\n",
"ticker = 'NVDA'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# define the time period\n",
"start_date = '2015-01-01'\n",
"end_date = '2024-11-21'"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[*********************100%***********************] 1 of 1 completed\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th>Price</th>\n",
" <th>Adj Close</th>\n",
" <th>Close</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Open</th>\n",
" <th>Volume</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Ticker</th>\n",
" <th>NVDA</th>\n",
" <th>NVDA</th>\n",
" <th>NVDA</th>\n",
" <th>NVDA</th>\n",
" <th>NVDA</th>\n",
" <th>NVDA</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2015-01-02</th>\n",
" <td>0.483177</td>\n",
" <td>0.50325</td>\n",
" <td>0.50700</td>\n",
" <td>0.49525</td>\n",
" <td>0.50325</td>\n",
" <td>113680000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-01-05</th>\n",
" <td>0.475016</td>\n",
" <td>0.49475</td>\n",
" <td>0.50475</td>\n",
" <td>0.49250</td>\n",
" <td>0.50325</td>\n",
" <td>197952000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-01-06</th>\n",
" <td>0.460614</td>\n",
" <td>0.47975</td>\n",
" <td>0.49600</td>\n",
" <td>0.47925</td>\n",
" <td>0.49550</td>\n",
" <td>197764000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-01-07</th>\n",
" <td>0.459414</td>\n",
" <td>0.47850</td>\n",
" <td>0.48750</td>\n",
" <td>0.47700</td>\n",
" <td>0.48325</td>\n",
" <td>321808000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-01-08</th>\n",
" <td>0.476696</td>\n",
" <td>0.49650</td>\n",
" <td>0.49950</td>\n",
" <td>0.48375</td>\n",
" <td>0.48400</td>\n",
" <td>283780000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Price Adj Close Close High Low Open Volume\n",
"Ticker NVDA NVDA NVDA NVDA NVDA NVDA\n",
"Date \n",
"2015-01-02 0.483177 0.50325 0.50700 0.49525 0.50325 113680000\n",
"2015-01-05 0.475016 0.49475 0.50475 0.49250 0.50325 197952000\n",
"2015-01-06 0.460614 0.47975 0.49600 0.47925 0.49550 197764000\n",
"2015-01-07 0.459414 0.47850 0.48750 0.47700 0.48325 321808000\n",
"2015-01-08 0.476696 0.49650 0.49950 0.48375 0.48400 283780000"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# download the data\n",
"df_nvda = yf.download(ticker, start=start_date, end=end_date)\n",
"df_nvda.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Define each column in the dataset\n",
"- **Date**: The date of the stock market data\n",
"- **Open**: The opening price of the stock\n",
"- **High**: The highest price of the stock\n",
"- **Low**: The lowest price of the stock\n",
"- **Close**: The closing price of the stock\n",
"- **Adj Close**: The adjusted closing price of the stock\n",
"- **Volume**: The volume of the stock\n",
"- **Ticker**: The ticker symbol of the stock\n",
"- **Name**: The name of the stock\n",
"- **Sector**: The sector of the stock\n",
"- **Industry**: The industry of the stock\n",
"- **Country**: The country of the stock"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# save the data in a csv file\n",
"df_nvda.to_csv('datasets/' + ticker + '_yfinance.csv')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[*********************100%***********************] 5 of 5 completed\n"
]
}
],
"source": [
"# download the data from multiple tickers\n",
"tickers = ['AAPL', 'MSFT', 'NVDA', 'GOOGL', 'AMZN']\n",
"start_date = '2010-01-01'\n",
"end_date = '2024-11-21'\n",
"df = yf.download(tickers, start=start_date, end=end_date)\n",
"df.head()\n",
"# save the data in a csv file\n",
"df.to_csv('datasets/' + '_'.join(tickers) + '_yfinance.csv')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"DatetimeIndex: 3747 entries, 2010-01-04 to 2024-11-20\n",
"Data columns (total 30 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 (Adj Close, AAPL) 3747 non-null float64\n",
" 1 (Adj Close, AMZN) 3747 non-null float64\n",
" 2 (Adj Close, GOOGL) 3747 non-null float64\n",
" 3 (Adj Close, MSFT) 3747 non-null float64\n",
" 4 (Adj Close, NVDA) 3747 non-null float64\n",
" 5 (Close, AAPL) 3747 non-null float64\n",
" 6 (Close, AMZN) 3747 non-null float64\n",
" 7 (Close, GOOGL) 3747 non-null float64\n",
" 8 (Close, MSFT) 3747 non-null float64\n",
" 9 (Close, NVDA) 3747 non-null float64\n",
" 10 (High, AAPL) 3747 non-null float64\n",
" 11 (High, AMZN) 3747 non-null float64\n",
" 12 (High, GOOGL) 3747 non-null float64\n",
" 13 (High, MSFT) 3747 non-null float64\n",
" 14 (High, NVDA) 3747 non-null float64\n",
" 15 (Low, AAPL) 3747 non-null float64\n",
" 16 (Low, AMZN) 3747 non-null float64\n",
" 17 (Low, GOOGL) 3747 non-null float64\n",
" 18 (Low, MSFT) 3747 non-null float64\n",
" 19 (Low, NVDA) 3747 non-null float64\n",
" 20 (Open, AAPL) 3747 non-null float64\n",
" 21 (Open, AMZN) 3747 non-null float64\n",
" 22 (Open, GOOGL) 3747 non-null float64\n",
" 23 (Open, MSFT) 3747 non-null float64\n",
" 24 (Open, NVDA) 3747 non-null float64\n",
" 25 (Volume, AAPL) 3747 non-null int64 \n",
" 26 (Volume, AMZN) 3747 non-null int64 \n",
" 27 (Volume, GOOGL) 3747 non-null int64 \n",
" 28 (Volume, MSFT) 3747 non-null int64 \n",
" 29 (Volume, NVDA) 3747 non-null int64 \n",
"dtypes: float64(25), int64(5)\n",
"memory usage: 907.5 KB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "data_gathering",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit f889e97

Please sign in to comment.