yfinace to use for gathering datasets

AammarTufail · Nov 22, 2024 · f889e97 · f889e97
1 parent 9d34012
commit f889e97
Show file tree

Hide file tree

Showing 3 changed files with 6,560 additions and 0 deletions.
diff --git a/13_data_gathering/04_yfinance.ipynb b/13_data_gathering/04_yfinance.ipynb
@@ -0,0 +1,318 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# **Loading stock market data using yfiance library**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import yfinance as yf"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Find the ticker symbols here: https://stockanalysis.com/stocks/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ticker define\n",
+    "ticker = 'NVDA'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define the time period\n",
+    "start_date = '2015-01-01'\n",
+    "end_date = '2024-11-21'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[*********************100%***********************]  1 of 1 completed\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr:last-of-type th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th>Price</th>\n",
+       "      <th>Adj Close</th>\n",
+       "      <th>Close</th>\n",
+       "      <th>High</th>\n",
+       "      <th>Low</th>\n",
+       "      <th>Open</th>\n",
+       "      <th>Volume</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Ticker</th>\n",
+       "      <th>NVDA</th>\n",
+       "      <th>NVDA</th>\n",
+       "      <th>NVDA</th>\n",
+       "      <th>NVDA</th>\n",
+       "      <th>NVDA</th>\n",
+       "      <th>NVDA</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Date</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2015-01-02</th>\n",
+       "      <td>0.483177</td>\n",
+       "      <td>0.50325</td>\n",
+       "      <td>0.50700</td>\n",
+       "      <td>0.49525</td>\n",
+       "      <td>0.50325</td>\n",
+       "      <td>113680000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2015-01-05</th>\n",
+       "      <td>0.475016</td>\n",
+       "      <td>0.49475</td>\n",
+       "      <td>0.50475</td>\n",
+       "      <td>0.49250</td>\n",
+       "      <td>0.50325</td>\n",
+       "      <td>197952000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2015-01-06</th>\n",
+       "      <td>0.460614</td>\n",
+       "      <td>0.47975</td>\n",
+       "      <td>0.49600</td>\n",
+       "      <td>0.47925</td>\n",
+       "      <td>0.49550</td>\n",
+       "      <td>197764000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2015-01-07</th>\n",
+       "      <td>0.459414</td>\n",
+       "      <td>0.47850</td>\n",
+       "      <td>0.48750</td>\n",
+       "      <td>0.47700</td>\n",
+       "      <td>0.48325</td>\n",
+       "      <td>321808000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2015-01-08</th>\n",
+       "      <td>0.476696</td>\n",
+       "      <td>0.49650</td>\n",
+       "      <td>0.49950</td>\n",
+       "      <td>0.48375</td>\n",
+       "      <td>0.48400</td>\n",
+       "      <td>283780000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Price      Adj Close    Close     High      Low     Open     Volume\n",
+       "Ticker          NVDA     NVDA     NVDA     NVDA     NVDA       NVDA\n",
+       "Date                                                               \n",
+       "2015-01-02  0.483177  0.50325  0.50700  0.49525  0.50325  113680000\n",
+       "2015-01-05  0.475016  0.49475  0.50475  0.49250  0.50325  197952000\n",
+       "2015-01-06  0.460614  0.47975  0.49600  0.47925  0.49550  197764000\n",
+       "2015-01-07  0.459414  0.47850  0.48750  0.47700  0.48325  321808000\n",
+       "2015-01-08  0.476696  0.49650  0.49950  0.48375  0.48400  283780000"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# download the data\n",
+    "df_nvda = yf.download(ticker, start=start_date, end=end_date)\n",
+    "df_nvda.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Define each column in the dataset\n",
+    "- **Date**: The date of the stock market data\n",
+    "- **Open**: The opening price of the stock\n",
+    "- **High**: The highest price of the stock\n",
+    "- **Low**: The lowest price of the stock\n",
+    "- **Close**: The closing price of the stock\n",
+    "- **Adj Close**: The adjusted closing price of the stock\n",
+    "- **Volume**: The volume of the stock\n",
+    "- **Ticker**: The ticker symbol of the stock\n",
+    "- **Name**: The name of the stock\n",
+    "- **Sector**: The sector of the stock\n",
+    "- **Industry**: The industry of the stock\n",
+    "- **Country**: The country of the stock"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# save the data in a csv file\n",
+    "df_nvda.to_csv('datasets/' + ticker + '_yfinance.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[*********************100%***********************]  5 of 5 completed\n"
+     ]
+    }
+   ],
+   "source": [
+    "# download the data from multiple tickers\n",
+    "tickers = ['AAPL', 'MSFT', 'NVDA', 'GOOGL', 'AMZN']\n",
+    "start_date = '2010-01-01'\n",
+    "end_date = '2024-11-21'\n",
+    "df = yf.download(tickers, start=start_date, end=end_date)\n",
+    "df.head()\n",
+    "# save the data in a csv file\n",
+    "df.to_csv('datasets/' + '_'.join(tickers) + '_yfinance.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "DatetimeIndex: 3747 entries, 2010-01-04 to 2024-11-20\n",
+      "Data columns (total 30 columns):\n",
+      " #   Column              Non-Null Count  Dtype  \n",
+      "---  ------              --------------  -----  \n",
+      " 0   (Adj Close, AAPL)   3747 non-null   float64\n",
+      " 1   (Adj Close, AMZN)   3747 non-null   float64\n",
+      " 2   (Adj Close, GOOGL)  3747 non-null   float64\n",
+      " 3   (Adj Close, MSFT)   3747 non-null   float64\n",
+      " 4   (Adj Close, NVDA)   3747 non-null   float64\n",
+      " 5   (Close, AAPL)       3747 non-null   float64\n",
+      " 6   (Close, AMZN)       3747 non-null   float64\n",
+      " 7   (Close, GOOGL)      3747 non-null   float64\n",
+      " 8   (Close, MSFT)       3747 non-null   float64\n",
+      " 9   (Close, NVDA)       3747 non-null   float64\n",
+      " 10  (High, AAPL)        3747 non-null   float64\n",
+      " 11  (High, AMZN)        3747 non-null   float64\n",
+      " 12  (High, GOOGL)       3747 non-null   float64\n",
+      " 13  (High, MSFT)        3747 non-null   float64\n",
+      " 14  (High, NVDA)        3747 non-null   float64\n",
+      " 15  (Low, AAPL)         3747 non-null   float64\n",
+      " 16  (Low, AMZN)         3747 non-null   float64\n",
+      " 17  (Low, GOOGL)        3747 non-null   float64\n",
+      " 18  (Low, MSFT)         3747 non-null   float64\n",
+      " 19  (Low, NVDA)         3747 non-null   float64\n",
+      " 20  (Open, AAPL)        3747 non-null   float64\n",
+      " 21  (Open, AMZN)        3747 non-null   float64\n",
+      " 22  (Open, GOOGL)       3747 non-null   float64\n",
+      " 23  (Open, MSFT)        3747 non-null   float64\n",
+      " 24  (Open, NVDA)        3747 non-null   float64\n",
+      " 25  (Volume, AAPL)      3747 non-null   int64  \n",
+      " 26  (Volume, AMZN)      3747 non-null   int64  \n",
+      " 27  (Volume, GOOGL)     3747 non-null   int64  \n",
+      " 28  (Volume, MSFT)      3747 non-null   int64  \n",
+      " 29  (Volume, NVDA)      3747 non-null   int64  \n",
+      "dtypes: float64(25), int64(5)\n",
+      "memory usage: 907.5 KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "df.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "data_gathering",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}