From 8d5054594024c19cde19708d523e0f1cb10bae8a Mon Sep 17 00:00:00 2001
From: AammarTufail <m.aammar.tufail@outlook.com>
Date: Fri, 5 Aug 2022 14:52:45 +0200
Subject: [PATCH] Solution of Assignments

---
 .../Excercises/07_grouping.ipynb              |  11 +-
 .../Excercises/08_grouping.ipynb              |  11 +-
 .../Solutions/06_filtering_and_sorting.ipynb  |   9 +-
 .../Solutions/07_grouping.ipynb               | 557 +++++++++++++
 .../Solutions/08_grouping.ipynb               | 593 ++++++++++++++
 .../Solutions/09_grouping.ipynb               | 749 ++++++++++++++++++
 6 files changed, 1922 insertions(+), 8 deletions(-)
 create mode 100644 02_pandas_tips&tricks/Solutions/07_grouping.ipynb
 create mode 100644 02_pandas_tips&tricks/Solutions/08_grouping.ipynb
 create mode 100644 02_pandas_tips&tricks/Solutions/09_grouping.ipynb

diff --git a/02_pandas_tips&tricks/Excercises/07_grouping.ipynb b/02_pandas_tips&tricks/Excercises/07_grouping.ipynb
index d49a518..ad32fbc 100644
--- a/02_pandas_tips&tricks/Excercises/07_grouping.ipynb
+++ b/02_pandas_tips&tricks/Excercises/07_grouping.ipynb
@@ -123,9 +123,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 2",
+   "display_name": "Python 3.9.7 ('base')",
    "language": "python",
-   "name": "python2"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -137,7 +137,12 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython2",
-   "version": "2.7.16"
+   "version": "3.9.7"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
+   }
   }
  },
  "nbformat": 4,
diff --git a/02_pandas_tips&tricks/Excercises/08_grouping.ipynb b/02_pandas_tips&tricks/Excercises/08_grouping.ipynb
index 16fbcdd..c2d580c 100644
--- a/02_pandas_tips&tricks/Excercises/08_grouping.ipynb
+++ b/02_pandas_tips&tricks/Excercises/08_grouping.ipynb
@@ -133,9 +133,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 2",
+   "display_name": "Python 3.9.7 ('base')",
    "language": "python",
-   "name": "python2"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -147,7 +147,12 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython2",
-   "version": "2.7.11"
+   "version": "3.9.7"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
+   }
   }
  },
  "nbformat": 4,
diff --git a/02_pandas_tips&tricks/Solutions/06_filtering_and_sorting.ipynb b/02_pandas_tips&tricks/Solutions/06_filtering_and_sorting.ipynb
index abbfcb5..165f214 100644
--- a/02_pandas_tips&tricks/Solutions/06_filtering_and_sorting.ipynb
+++ b/02_pandas_tips&tricks/Solutions/06_filtering_and_sorting.ipynb
@@ -1918,7 +1918,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3.9.7 ('base')",
    "language": "python",
    "name": "python3"
   },
@@ -1932,7 +1932,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.9.7"
   },
   "toc": {
    "base_numbering": 1,
@@ -1946,6 +1946,11 @@
    "toc_position": {},
    "toc_section_display": true,
    "toc_window_display": false
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
+   }
   }
  },
  "nbformat": 4,
diff --git a/02_pandas_tips&tricks/Solutions/07_grouping.ipynb b/02_pandas_tips&tricks/Solutions/07_grouping.ipynb
new file mode 100644
index 0000000..27f511a
--- /dev/null
+++ b/02_pandas_tips&tricks/Solutions/07_grouping.ipynb
@@ -0,0 +1,557 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Ex - GroupBy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv). "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 3. Assign it to a variable called drinks."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>country</th>\n",
+       "      <th>beer_servings</th>\n",
+       "      <th>spirit_servings</th>\n",
+       "      <th>wine_servings</th>\n",
+       "      <th>total_litres_of_pure_alcohol</th>\n",
+       "      <th>continent</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>AS</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Albania</td>\n",
+       "      <td>89</td>\n",
+       "      <td>132</td>\n",
+       "      <td>54</td>\n",
+       "      <td>4.9</td>\n",
+       "      <td>EU</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Algeria</td>\n",
+       "      <td>25</td>\n",
+       "      <td>0</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0.7</td>\n",
+       "      <td>AF</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Andorra</td>\n",
+       "      <td>245</td>\n",
+       "      <td>138</td>\n",
+       "      <td>312</td>\n",
+       "      <td>12.4</td>\n",
+       "      <td>EU</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Angola</td>\n",
+       "      <td>217</td>\n",
+       "      <td>57</td>\n",
+       "      <td>45</td>\n",
+       "      <td>5.9</td>\n",
+       "      <td>AF</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       country  beer_servings  spirit_servings  wine_servings  \\\n",
+       "0  Afghanistan              0                0              0   \n",
+       "1      Albania             89              132             54   \n",
+       "2      Algeria             25                0             14   \n",
+       "3      Andorra            245              138            312   \n",
+       "4       Angola            217               57             45   \n",
+       "\n",
+       "   total_litres_of_pure_alcohol continent  \n",
+       "0                           0.0        AS  \n",
+       "1                           4.9        EU  \n",
+       "2                           0.7        AF  \n",
+       "3                          12.4        EU  \n",
+       "4                           5.9        AF  "
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "drinks = pd.read_csv('https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv')\n",
+    "drinks.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 4. Which continent drinks more beer on average?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "continent\n",
+       "AF     61.471698\n",
+       "AS     37.045455\n",
+       "EU    193.777778\n",
+       "OC     89.687500\n",
+       "SA    175.083333\n",
+       "Name: beer_servings, dtype: float64"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "drinks.groupby('continent').beer_servings.mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 5. For each continent print the statistics for wine consumption."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "continent       \n",
+       "AF         count     53.000000\n",
+       "           mean      16.264151\n",
+       "           std       38.846419\n",
+       "           min        0.000000\n",
+       "           25%        1.000000\n",
+       "           50%        2.000000\n",
+       "           75%       13.000000\n",
+       "           max      233.000000\n",
+       "AS         count     44.000000\n",
+       "           mean       9.068182\n",
+       "           std       21.667034\n",
+       "           min        0.000000\n",
+       "           25%        0.000000\n",
+       "           50%        1.000000\n",
+       "           75%        8.000000\n",
+       "           max      123.000000\n",
+       "EU         count     45.000000\n",
+       "           mean     142.222222\n",
+       "           std       97.421738\n",
+       "           min        0.000000\n",
+       "           25%       59.000000\n",
+       "           50%      128.000000\n",
+       "           75%      195.000000\n",
+       "           max      370.000000\n",
+       "OC         count     16.000000\n",
+       "           mean      35.625000\n",
+       "           std       64.555790\n",
+       "           min        0.000000\n",
+       "           25%        1.000000\n",
+       "           50%        8.500000\n",
+       "           75%       23.250000\n",
+       "           max      212.000000\n",
+       "SA         count     12.000000\n",
+       "           mean      62.416667\n",
+       "           std       88.620189\n",
+       "           min        1.000000\n",
+       "           25%        3.000000\n",
+       "           50%       12.000000\n",
+       "           75%       98.500000\n",
+       "           max      221.000000\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "drinks.groupby('continent').wine_servings.describe()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 6. Print the mean alcohol consumption per continent for every column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>beer_servings</th>\n",
+       "      <th>spirit_servings</th>\n",
+       "      <th>wine_servings</th>\n",
+       "      <th>total_litres_of_pure_alcohol</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>continent</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>AF</th>\n",
+       "      <td>61.471698</td>\n",
+       "      <td>16.339623</td>\n",
+       "      <td>16.264151</td>\n",
+       "      <td>3.007547</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AS</th>\n",
+       "      <td>37.045455</td>\n",
+       "      <td>60.840909</td>\n",
+       "      <td>9.068182</td>\n",
+       "      <td>2.170455</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>EU</th>\n",
+       "      <td>193.777778</td>\n",
+       "      <td>132.555556</td>\n",
+       "      <td>142.222222</td>\n",
+       "      <td>8.617778</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>OC</th>\n",
+       "      <td>89.687500</td>\n",
+       "      <td>58.437500</td>\n",
+       "      <td>35.625000</td>\n",
+       "      <td>3.381250</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SA</th>\n",
+       "      <td>175.083333</td>\n",
+       "      <td>114.750000</td>\n",
+       "      <td>62.416667</td>\n",
+       "      <td>6.308333</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           beer_servings  spirit_servings  wine_servings  \\\n",
+       "continent                                                  \n",
+       "AF             61.471698        16.339623      16.264151   \n",
+       "AS             37.045455        60.840909       9.068182   \n",
+       "EU            193.777778       132.555556     142.222222   \n",
+       "OC             89.687500        58.437500      35.625000   \n",
+       "SA            175.083333       114.750000      62.416667   \n",
+       "\n",
+       "           total_litres_of_pure_alcohol  \n",
+       "continent                                \n",
+       "AF                             3.007547  \n",
+       "AS                             2.170455  \n",
+       "EU                             8.617778  \n",
+       "OC                             3.381250  \n",
+       "SA                             6.308333  "
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "drinks.groupby('continent').mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 7. Print the median alcohol consumption per continent for every column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>beer_servings</th>\n",
+       "      <th>spirit_servings</th>\n",
+       "      <th>wine_servings</th>\n",
+       "      <th>total_litres_of_pure_alcohol</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>continent</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>AF</th>\n",
+       "      <td>32.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>2.30</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AS</th>\n",
+       "      <td>17.5</td>\n",
+       "      <td>16.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.20</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>EU</th>\n",
+       "      <td>219.0</td>\n",
+       "      <td>122.0</td>\n",
+       "      <td>128.0</td>\n",
+       "      <td>10.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>OC</th>\n",
+       "      <td>52.5</td>\n",
+       "      <td>37.0</td>\n",
+       "      <td>8.5</td>\n",
+       "      <td>1.75</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SA</th>\n",
+       "      <td>162.5</td>\n",
+       "      <td>108.5</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>6.85</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           beer_servings  spirit_servings  wine_servings  \\\n",
+       "continent                                                  \n",
+       "AF                  32.0              3.0            2.0   \n",
+       "AS                  17.5             16.0            1.0   \n",
+       "EU                 219.0            122.0          128.0   \n",
+       "OC                  52.5             37.0            8.5   \n",
+       "SA                 162.5            108.5           12.0   \n",
+       "\n",
+       "           total_litres_of_pure_alcohol  \n",
+       "continent                                \n",
+       "AF                                 2.30  \n",
+       "AS                                 1.20  \n",
+       "EU                                10.00  \n",
+       "OC                                 1.75  \n",
+       "SA                                 6.85  "
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "drinks.groupby('continent').median()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 8. Print the mean, min and max values for spirit consumption.\n",
+    "#### This time output a DataFrame"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mean</th>\n",
+       "      <th>min</th>\n",
+       "      <th>max</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>continent</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>AF</th>\n",
+       "      <td>16.339623</td>\n",
+       "      <td>0</td>\n",
+       "      <td>152</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AS</th>\n",
+       "      <td>60.840909</td>\n",
+       "      <td>0</td>\n",
+       "      <td>326</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>EU</th>\n",
+       "      <td>132.555556</td>\n",
+       "      <td>0</td>\n",
+       "      <td>373</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>OC</th>\n",
+       "      <td>58.437500</td>\n",
+       "      <td>0</td>\n",
+       "      <td>254</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SA</th>\n",
+       "      <td>114.750000</td>\n",
+       "      <td>25</td>\n",
+       "      <td>302</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                 mean  min  max\n",
+       "continent                      \n",
+       "AF          16.339623    0  152\n",
+       "AS          60.840909    0  326\n",
+       "EU         132.555556    0  373\n",
+       "OC          58.437500    0  254\n",
+       "SA         114.750000   25  302"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "drinks.groupby('continent').spirit_servings.agg(['mean', 'min', 'max'])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.7 ('base')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/02_pandas_tips&tricks/Solutions/08_grouping.ipynb b/02_pandas_tips&tricks/Solutions/08_grouping.ipynb
new file mode 100644
index 0000000..1283e66
--- /dev/null
+++ b/02_pandas_tips&tricks/Solutions/08_grouping.ipynb
@@ -0,0 +1,593 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Occupation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 3. Assign it to a variable called users."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>age</th>\n",
+       "      <th>gender</th>\n",
+       "      <th>occupation</th>\n",
+       "      <th>zip_code</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>user_id</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>24</td>\n",
+       "      <td>M</td>\n",
+       "      <td>technician</td>\n",
+       "      <td>85711</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>53</td>\n",
+       "      <td>F</td>\n",
+       "      <td>other</td>\n",
+       "      <td>94043</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>23</td>\n",
+       "      <td>M</td>\n",
+       "      <td>writer</td>\n",
+       "      <td>32067</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>24</td>\n",
+       "      <td>M</td>\n",
+       "      <td>technician</td>\n",
+       "      <td>43537</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>33</td>\n",
+       "      <td>F</td>\n",
+       "      <td>other</td>\n",
+       "      <td>15213</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         age gender  occupation zip_code\n",
+       "user_id                                 \n",
+       "1         24      M  technician    85711\n",
+       "2         53      F       other    94043\n",
+       "3         23      M      writer    32067\n",
+       "4         24      M  technician    43537\n",
+       "5         33      F       other    15213"
+      ]
+     },
+     "execution_count": 65,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "users = pd.read_table('https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user', \n",
+    "                      sep='|', index_col='user_id')\n",
+    "users.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 4. Discover what is the mean age per occupation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "occupation\n",
+       "administrator    38.746835\n",
+       "artist           31.392857\n",
+       "doctor           43.571429\n",
+       "educator         42.010526\n",
+       "engineer         36.388060\n",
+       "entertainment    29.222222\n",
+       "executive        38.718750\n",
+       "healthcare       41.562500\n",
+       "homemaker        32.571429\n",
+       "lawyer           36.750000\n",
+       "librarian        40.000000\n",
+       "marketing        37.615385\n",
+       "none             26.555556\n",
+       "other            34.523810\n",
+       "programmer       33.121212\n",
+       "retired          63.071429\n",
+       "salesman         35.666667\n",
+       "scientist        35.548387\n",
+       "student          22.081633\n",
+       "technician       33.148148\n",
+       "writer           36.311111\n",
+       "Name: age, dtype: float64"
+      ]
+     },
+     "execution_count": 66,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "users.groupby('occupation').age.mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 5. Discover the Male ratio per occupation and sort it from the most to the least"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 150,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "doctor           100.000000\n",
+       "engineer          97.014925\n",
+       "technician        96.296296\n",
+       "retired           92.857143\n",
+       "programmer        90.909091\n",
+       "executive         90.625000\n",
+       "scientist         90.322581\n",
+       "entertainment     88.888889\n",
+       "lawyer            83.333333\n",
+       "salesman          75.000000\n",
+       "educator          72.631579\n",
+       "student           69.387755\n",
+       "other             65.714286\n",
+       "marketing         61.538462\n",
+       "writer            57.777778\n",
+       "none              55.555556\n",
+       "administrator     54.430380\n",
+       "artist            53.571429\n",
+       "librarian         43.137255\n",
+       "healthcare        31.250000\n",
+       "homemaker         14.285714\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 150,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# create a function\n",
+    "def gender_to_numeric(x):\n",
+    "    if x == 'M':\n",
+    "        return 1\n",
+    "    if x == 'F':\n",
+    "        return 0\n",
+    "\n",
+    "# apply the function to the gender column and create a new column\n",
+    "users['gender_n'] = users['gender'].apply(gender_to_numeric)\n",
+    "\n",
+    "\n",
+    "a = users.groupby('occupation').gender_n.sum() / users.occupation.value_counts() * 100 \n",
+    "\n",
+    "# sort to the most male \n",
+    "a.sort_values(ascending = False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 6. For each occupation, calculate the minimum and maximum ages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 151,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>min</th>\n",
+       "      <th>max</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>occupation</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>administrator</th>\n",
+       "      <td>21</td>\n",
+       "      <td>70</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>artist</th>\n",
+       "      <td>19</td>\n",
+       "      <td>48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>doctor</th>\n",
+       "      <td>28</td>\n",
+       "      <td>64</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>educator</th>\n",
+       "      <td>23</td>\n",
+       "      <td>63</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>engineer</th>\n",
+       "      <td>22</td>\n",
+       "      <td>70</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>entertainment</th>\n",
+       "      <td>15</td>\n",
+       "      <td>50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>executive</th>\n",
+       "      <td>22</td>\n",
+       "      <td>69</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>healthcare</th>\n",
+       "      <td>22</td>\n",
+       "      <td>62</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>homemaker</th>\n",
+       "      <td>20</td>\n",
+       "      <td>50</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>lawyer</th>\n",
+       "      <td>21</td>\n",
+       "      <td>53</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>librarian</th>\n",
+       "      <td>23</td>\n",
+       "      <td>69</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>marketing</th>\n",
+       "      <td>24</td>\n",
+       "      <td>55</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>none</th>\n",
+       "      <td>11</td>\n",
+       "      <td>55</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>other</th>\n",
+       "      <td>13</td>\n",
+       "      <td>64</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>programmer</th>\n",
+       "      <td>20</td>\n",
+       "      <td>63</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>retired</th>\n",
+       "      <td>51</td>\n",
+       "      <td>73</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>salesman</th>\n",
+       "      <td>18</td>\n",
+       "      <td>66</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>scientist</th>\n",
+       "      <td>23</td>\n",
+       "      <td>55</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>student</th>\n",
+       "      <td>7</td>\n",
+       "      <td>42</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>technician</th>\n",
+       "      <td>21</td>\n",
+       "      <td>55</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>writer</th>\n",
+       "      <td>18</td>\n",
+       "      <td>60</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               min  max\n",
+       "occupation             \n",
+       "administrator   21   70\n",
+       "artist          19   48\n",
+       "doctor          28   64\n",
+       "educator        23   63\n",
+       "engineer        22   70\n",
+       "entertainment   15   50\n",
+       "executive       22   69\n",
+       "healthcare      22   62\n",
+       "homemaker       20   50\n",
+       "lawyer          21   53\n",
+       "librarian       23   69\n",
+       "marketing       24   55\n",
+       "none            11   55\n",
+       "other           13   64\n",
+       "programmer      20   63\n",
+       "retired         51   73\n",
+       "salesman        18   66\n",
+       "scientist       23   55\n",
+       "student          7   42\n",
+       "technician      21   55\n",
+       "writer          18   60"
+      ]
+     },
+     "execution_count": 151,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "users.groupby('occupation').age.agg(['min', 'max'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 7. For each combination of occupation and gender, calculate the mean age"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 152,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "occupation     gender\n",
+       "administrator  F         40.638889\n",
+       "               M         37.162791\n",
+       "artist         F         30.307692\n",
+       "               M         32.333333\n",
+       "doctor         M         43.571429\n",
+       "educator       F         39.115385\n",
+       "               M         43.101449\n",
+       "engineer       F         29.500000\n",
+       "               M         36.600000\n",
+       "entertainment  F         31.000000\n",
+       "               M         29.000000\n",
+       "executive      F         44.000000\n",
+       "               M         38.172414\n",
+       "healthcare     F         39.818182\n",
+       "               M         45.400000\n",
+       "homemaker      F         34.166667\n",
+       "               M         23.000000\n",
+       "lawyer         F         39.500000\n",
+       "               M         36.200000\n",
+       "librarian      F         40.000000\n",
+       "               M         40.000000\n",
+       "marketing      F         37.200000\n",
+       "               M         37.875000\n",
+       "none           F         36.500000\n",
+       "               M         18.600000\n",
+       "other          F         35.472222\n",
+       "               M         34.028986\n",
+       "programmer     F         32.166667\n",
+       "               M         33.216667\n",
+       "retired        F         70.000000\n",
+       "               M         62.538462\n",
+       "salesman       F         27.000000\n",
+       "               M         38.555556\n",
+       "scientist      F         28.333333\n",
+       "               M         36.321429\n",
+       "student        F         20.750000\n",
+       "               M         22.669118\n",
+       "technician     F         38.000000\n",
+       "               M         32.961538\n",
+       "writer         F         37.631579\n",
+       "               M         35.346154\n",
+       "Name: age, dtype: float64"
+      ]
+     },
+     "execution_count": 152,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "users.groupby(['occupation', 'gender']).age.mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 8.  For each occupation present the percentage of women and men"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 154,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "occupation     gender\n",
+       "administrator  F          45.569620\n",
+       "               M          54.430380\n",
+       "artist         F          46.428571\n",
+       "               M          53.571429\n",
+       "doctor         M         100.000000\n",
+       "educator       F          27.368421\n",
+       "               M          72.631579\n",
+       "engineer       F           2.985075\n",
+       "               M          97.014925\n",
+       "entertainment  F          11.111111\n",
+       "               M          88.888889\n",
+       "executive      F           9.375000\n",
+       "               M          90.625000\n",
+       "healthcare     F          68.750000\n",
+       "               M          31.250000\n",
+       "homemaker      F          85.714286\n",
+       "               M          14.285714\n",
+       "lawyer         F          16.666667\n",
+       "               M          83.333333\n",
+       "librarian      F          56.862745\n",
+       "               M          43.137255\n",
+       "marketing      F          38.461538\n",
+       "               M          61.538462\n",
+       "none           F          44.444444\n",
+       "               M          55.555556\n",
+       "other          F          34.285714\n",
+       "               M          65.714286\n",
+       "programmer     F           9.090909\n",
+       "               M          90.909091\n",
+       "retired        F           7.142857\n",
+       "               M          92.857143\n",
+       "salesman       F          25.000000\n",
+       "               M          75.000000\n",
+       "scientist      F           9.677419\n",
+       "               M          90.322581\n",
+       "student        F          30.612245\n",
+       "               M          69.387755\n",
+       "technician     F           3.703704\n",
+       "               M          96.296296\n",
+       "writer         F          42.222222\n",
+       "               M          57.777778\n",
+       "Name: gender, dtype: float64"
+      ]
+     },
+     "execution_count": 154,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# create a data frame and apply count to gender\n",
+    "gender_ocup = users.groupby(['occupation', 'gender']).agg({'gender': 'count'})\n",
+    "\n",
+    "# create a DataFrame and apply count for each occupation\n",
+    "occup_count = users.groupby(['occupation']).agg('count')\n",
+    "\n",
+    "# divide the gender_ocup per the occup_count and multiply per 100\n",
+    "occup_gender = gender_ocup.div(occup_count, level = \"occupation\") * 100\n",
+    "\n",
+    "# present all rows from the 'gender column'\n",
+    "occup_gender.loc[: , 'gender']"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.7 ('base')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/02_pandas_tips&tricks/Solutions/09_grouping.ipynb b/02_pandas_tips&tricks/Solutions/09_grouping.ipynb
new file mode 100644
index 0000000..a825886
--- /dev/null
+++ b/02_pandas_tips&tricks/Solutions/09_grouping.ipynb
@@ -0,0 +1,749 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Regiment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 2. Create the DataFrame with the following values:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "raw_data = {'regiment': ['Nighthawks', 'Nighthawks', 'Nighthawks', 'Nighthawks', 'Dragoons', 'Dragoons', 'Dragoons', 'Dragoons', 'Scouts', 'Scouts', 'Scouts', 'Scouts'], \n",
+    "        'company': ['1st', '1st', '2nd', '2nd', '1st', '1st', '2nd', '2nd','1st', '1st', '2nd', '2nd'], \n",
+    "        'name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze', 'Jacon', 'Ryaner', 'Sone', 'Sloan', 'Piger', 'Riani', 'Ali'], \n",
+    "        'preTestScore': [4, 24, 31, 2, 3, 4, 24, 31, 2, 3, 2, 3],\n",
+    "        'postTestScore': [25, 94, 57, 62, 70, 25, 94, 57, 62, 70, 62, 70]}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 3. Assign it to a variable called regiment.\n",
+    "#### Don't forget to name each column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>regiment</th>\n",
+       "      <th>company</th>\n",
+       "      <th>name</th>\n",
+       "      <th>preTestScore</th>\n",
+       "      <th>postTestScore</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Nighthawks</td>\n",
+       "      <td>1st</td>\n",
+       "      <td>Miller</td>\n",
+       "      <td>4</td>\n",
+       "      <td>25</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Nighthawks</td>\n",
+       "      <td>1st</td>\n",
+       "      <td>Jacobson</td>\n",
+       "      <td>24</td>\n",
+       "      <td>94</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Nighthawks</td>\n",
+       "      <td>2nd</td>\n",
+       "      <td>Ali</td>\n",
+       "      <td>31</td>\n",
+       "      <td>57</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Nighthawks</td>\n",
+       "      <td>2nd</td>\n",
+       "      <td>Milner</td>\n",
+       "      <td>2</td>\n",
+       "      <td>62</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Dragoons</td>\n",
+       "      <td>1st</td>\n",
+       "      <td>Cooze</td>\n",
+       "      <td>3</td>\n",
+       "      <td>70</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Dragoons</td>\n",
+       "      <td>1st</td>\n",
+       "      <td>Jacon</td>\n",
+       "      <td>4</td>\n",
+       "      <td>25</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Dragoons</td>\n",
+       "      <td>2nd</td>\n",
+       "      <td>Ryaner</td>\n",
+       "      <td>24</td>\n",
+       "      <td>94</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>Dragoons</td>\n",
+       "      <td>2nd</td>\n",
+       "      <td>Sone</td>\n",
+       "      <td>31</td>\n",
+       "      <td>57</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>Scouts</td>\n",
+       "      <td>1st</td>\n",
+       "      <td>Sloan</td>\n",
+       "      <td>2</td>\n",
+       "      <td>62</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>Scouts</td>\n",
+       "      <td>1st</td>\n",
+       "      <td>Piger</td>\n",
+       "      <td>3</td>\n",
+       "      <td>70</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>Scouts</td>\n",
+       "      <td>2nd</td>\n",
+       "      <td>Riani</td>\n",
+       "      <td>2</td>\n",
+       "      <td>62</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>Scouts</td>\n",
+       "      <td>2nd</td>\n",
+       "      <td>Ali</td>\n",
+       "      <td>3</td>\n",
+       "      <td>70</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      regiment company      name  preTestScore  postTestScore\n",
+       "0   Nighthawks     1st    Miller             4             25\n",
+       "1   Nighthawks     1st  Jacobson            24             94\n",
+       "2   Nighthawks     2nd       Ali            31             57\n",
+       "3   Nighthawks     2nd    Milner             2             62\n",
+       "4     Dragoons     1st     Cooze             3             70\n",
+       "5     Dragoons     1st     Jacon             4             25\n",
+       "6     Dragoons     2nd    Ryaner            24             94\n",
+       "7     Dragoons     2nd      Sone            31             57\n",
+       "8       Scouts     1st     Sloan             2             62\n",
+       "9       Scouts     1st     Piger             3             70\n",
+       "10      Scouts     2nd     Riani             2             62\n",
+       "11      Scouts     2nd       Ali             3             70"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "regiment = pd.DataFrame(raw_data, columns = raw_data.keys())\n",
+    "regiment"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 4. What is the mean preTestScore from the regiment Nighthawks?  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>preTestScore</th>\n",
+       "      <th>postTestScore</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>regiment</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Dragoons</th>\n",
+       "      <td>15.50</td>\n",
+       "      <td>61.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Nighthawks</th>\n",
+       "      <td>15.25</td>\n",
+       "      <td>59.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Scouts</th>\n",
+       "      <td>2.50</td>\n",
+       "      <td>66.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            preTestScore  postTestScore\n",
+       "regiment                               \n",
+       "Dragoons           15.50           61.5\n",
+       "Nighthawks         15.25           59.5\n",
+       "Scouts              2.50           66.0"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "regiment[regiment['regiment'] == 'Nighthawks'].groupby('regiment').mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 5. Present general statistics by company"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>postTestScore</th>\n",
+       "      <th>preTestScore</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>company</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"8\" valign=\"top\">1st</th>\n",
+       "      <th>count</th>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>57.666667</td>\n",
+       "      <td>6.666667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>27.485754</td>\n",
+       "      <td>8.524475</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>25.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>34.250000</td>\n",
+       "      <td>3.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>66.000000</td>\n",
+       "      <td>3.500000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>70.000000</td>\n",
+       "      <td>4.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>94.000000</td>\n",
+       "      <td>24.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"8\" valign=\"top\">2nd</th>\n",
+       "      <th>count</th>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>67.000000</td>\n",
+       "      <td>15.500000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>14.057027</td>\n",
+       "      <td>14.652645</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>57.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>58.250000</td>\n",
+       "      <td>2.250000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>62.000000</td>\n",
+       "      <td>13.500000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>68.000000</td>\n",
+       "      <td>29.250000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>94.000000</td>\n",
+       "      <td>31.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               postTestScore  preTestScore\n",
+       "company                                   \n",
+       "1st     count       6.000000      6.000000\n",
+       "        mean       57.666667      6.666667\n",
+       "        std        27.485754      8.524475\n",
+       "        min        25.000000      2.000000\n",
+       "        25%        34.250000      3.000000\n",
+       "        50%        66.000000      3.500000\n",
+       "        75%        70.000000      4.000000\n",
+       "        max        94.000000     24.000000\n",
+       "2nd     count       6.000000      6.000000\n",
+       "        mean       67.000000     15.500000\n",
+       "        std        14.057027     14.652645\n",
+       "        min        57.000000      2.000000\n",
+       "        25%        58.250000      2.250000\n",
+       "        50%        62.000000     13.500000\n",
+       "        75%        68.000000     29.250000\n",
+       "        max        94.000000     31.000000"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "regiment.groupby('company').describe()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 6. What is the mean of each company's preTestScore?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "company\n",
+       "1st     6.666667\n",
+       "2nd    15.500000\n",
+       "Name: preTestScore, dtype: float64"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "regiment.groupby('company').preTestScore.mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 7. Present the mean preTestScores grouped by regiment and company"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "regiment    company\n",
+       "Dragoons    1st         3.5\n",
+       "            2nd        27.5\n",
+       "Nighthawks  1st        14.0\n",
+       "            2nd        16.5\n",
+       "Scouts      1st         2.5\n",
+       "            2nd         2.5\n",
+       "Name: preTestScore, dtype: float64"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "regiment.groupby(['regiment', 'company']).preTestScore.mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 8. Present the mean preTestScores grouped by regiment and company without heirarchical indexing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th>company</th>\n",
+       "      <th>1st</th>\n",
+       "      <th>2nd</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>regiment</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Dragoons</th>\n",
+       "      <td>3.5</td>\n",
+       "      <td>27.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Nighthawks</th>\n",
+       "      <td>14.0</td>\n",
+       "      <td>16.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Scouts</th>\n",
+       "      <td>2.5</td>\n",
+       "      <td>2.5</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "company      1st   2nd\n",
+       "regiment              \n",
+       "Dragoons     3.5  27.5\n",
+       "Nighthawks  14.0  16.5\n",
+       "Scouts       2.5   2.5"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "regiment.groupby(['regiment', 'company']).preTestScore.mean().unstack()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 9. Group the entire dataframe by regiment and company"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>preTestScore</th>\n",
+       "      <th>postTestScore</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>regiment</th>\n",
+       "      <th>company</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">Dragoons</th>\n",
+       "      <th>1st</th>\n",
+       "      <td>3.5</td>\n",
+       "      <td>47.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2nd</th>\n",
+       "      <td>27.5</td>\n",
+       "      <td>75.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">Nighthawks</th>\n",
+       "      <th>1st</th>\n",
+       "      <td>14.0</td>\n",
+       "      <td>59.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2nd</th>\n",
+       "      <td>16.5</td>\n",
+       "      <td>59.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">Scouts</th>\n",
+       "      <th>1st</th>\n",
+       "      <td>2.5</td>\n",
+       "      <td>66.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2nd</th>\n",
+       "      <td>2.5</td>\n",
+       "      <td>66.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    preTestScore  postTestScore\n",
+       "regiment   company                             \n",
+       "Dragoons   1st               3.5           47.5\n",
+       "           2nd              27.5           75.5\n",
+       "Nighthawks 1st              14.0           59.5\n",
+       "           2nd              16.5           59.5\n",
+       "Scouts     1st               2.5           66.0\n",
+       "           2nd               2.5           66.0"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "regiment.groupby(['regiment', 'company']).mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 10. What is the number of observations in each regiment and company"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "company  regiment  \n",
+       "1st      Dragoons      2\n",
+       "         Nighthawks    2\n",
+       "         Scouts        2\n",
+       "2nd      Dragoons      2\n",
+       "         Nighthawks    2\n",
+       "         Scouts        2\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "regiment.groupby(['company', 'regiment']).size()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 11. Iterate over a group and print the name and the whole data from the regiment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dragoons\n",
+      "   regiment company    name  preTestScore  postTestScore\n",
+      "4  Dragoons     1st   Cooze             3             70\n",
+      "5  Dragoons     1st   Jacon             4             25\n",
+      "6  Dragoons     2nd  Ryaner            24             94\n",
+      "7  Dragoons     2nd    Sone            31             57\n",
+      "Nighthawks\n",
+      "     regiment company      name  preTestScore  postTestScore\n",
+      "0  Nighthawks     1st    Miller             4             25\n",
+      "1  Nighthawks     1st  Jacobson            24             94\n",
+      "2  Nighthawks     2nd       Ali            31             57\n",
+      "3  Nighthawks     2nd    Milner             2             62\n",
+      "Scouts\n",
+      "   regiment company   name  preTestScore  postTestScore\n",
+      "8    Scouts     1st  Sloan             2             62\n",
+      "9    Scouts     1st  Piger             3             70\n",
+      "10   Scouts     2nd  Riani             2             62\n",
+      "11   Scouts     2nd    Ali             3             70\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Group the dataframe by regiment, and for each regiment,\n",
+    "for name, group in regiment.groupby('regiment'):\n",
+    "    # print the name of the regiment\n",
+    "    print(name)\n",
+    "    # print the data of that regiment\n",
+    "    print(group)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.7 ('base')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}