diff --git a/02_pandas_tips&tricks/Excercises/07_grouping.ipynb b/02_pandas_tips&tricks/Excercises/07_grouping.ipynb
index d49a518..ad32fbc 100644
--- a/02_pandas_tips&tricks/Excercises/07_grouping.ipynb
+++ b/02_pandas_tips&tricks/Excercises/07_grouping.ipynb
@@ -123,9 +123,9 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 2",
+ "display_name": "Python 3.9.7 ('base')",
"language": "python",
- "name": "python2"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
@@ -137,7 +137,12 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
- "version": "2.7.16"
+ "version": "3.9.7"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
+ }
}
},
"nbformat": 4,
diff --git a/02_pandas_tips&tricks/Excercises/08_grouping.ipynb b/02_pandas_tips&tricks/Excercises/08_grouping.ipynb
index 16fbcdd..c2d580c 100644
--- a/02_pandas_tips&tricks/Excercises/08_grouping.ipynb
+++ b/02_pandas_tips&tricks/Excercises/08_grouping.ipynb
@@ -133,9 +133,9 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 2",
+ "display_name": "Python 3.9.7 ('base')",
"language": "python",
- "name": "python2"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
@@ -147,7 +147,12 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
- "version": "2.7.11"
+ "version": "3.9.7"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
+ }
}
},
"nbformat": 4,
diff --git a/02_pandas_tips&tricks/Solutions/06_filtering_and_sorting.ipynb b/02_pandas_tips&tricks/Solutions/06_filtering_and_sorting.ipynb
index abbfcb5..165f214 100644
--- a/02_pandas_tips&tricks/Solutions/06_filtering_and_sorting.ipynb
+++ b/02_pandas_tips&tricks/Solutions/06_filtering_and_sorting.ipynb
@@ -1918,7 +1918,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3.9.7 ('base')",
"language": "python",
"name": "python3"
},
@@ -1932,7 +1932,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.3"
+ "version": "3.9.7"
},
"toc": {
"base_numbering": 1,
@@ -1946,6 +1946,11 @@
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
+ }
}
},
"nbformat": 4,
diff --git a/02_pandas_tips&tricks/Solutions/07_grouping.ipynb b/02_pandas_tips&tricks/Solutions/07_grouping.ipynb
new file mode 100644
index 0000000..27f511a
--- /dev/null
+++ b/02_pandas_tips&tricks/Solutions/07_grouping.ipynb
@@ -0,0 +1,557 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Ex - GroupBy"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv). "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 3. Assign it to a variable called drinks."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " country | \n",
+ " beer_servings | \n",
+ " spirit_servings | \n",
+ " wine_servings | \n",
+ " total_litres_of_pure_alcohol | \n",
+ " continent | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Afghanistan | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " AS | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Albania | \n",
+ " 89 | \n",
+ " 132 | \n",
+ " 54 | \n",
+ " 4.9 | \n",
+ " EU | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Algeria | \n",
+ " 25 | \n",
+ " 0 | \n",
+ " 14 | \n",
+ " 0.7 | \n",
+ " AF | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Andorra | \n",
+ " 245 | \n",
+ " 138 | \n",
+ " 312 | \n",
+ " 12.4 | \n",
+ " EU | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Angola | \n",
+ " 217 | \n",
+ " 57 | \n",
+ " 45 | \n",
+ " 5.9 | \n",
+ " AF | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " country beer_servings spirit_servings wine_servings \\\n",
+ "0 Afghanistan 0 0 0 \n",
+ "1 Albania 89 132 54 \n",
+ "2 Algeria 25 0 14 \n",
+ "3 Andorra 245 138 312 \n",
+ "4 Angola 217 57 45 \n",
+ "\n",
+ " total_litres_of_pure_alcohol continent \n",
+ "0 0.0 AS \n",
+ "1 4.9 EU \n",
+ "2 0.7 AF \n",
+ "3 12.4 EU \n",
+ "4 5.9 AF "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "drinks = pd.read_csv('https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv')\n",
+ "drinks.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 4. Which continent drinks more beer on average?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "continent\n",
+ "AF 61.471698\n",
+ "AS 37.045455\n",
+ "EU 193.777778\n",
+ "OC 89.687500\n",
+ "SA 175.083333\n",
+ "Name: beer_servings, dtype: float64"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "drinks.groupby('continent').beer_servings.mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 5. For each continent print the statistics for wine consumption."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "continent \n",
+ "AF count 53.000000\n",
+ " mean 16.264151\n",
+ " std 38.846419\n",
+ " min 0.000000\n",
+ " 25% 1.000000\n",
+ " 50% 2.000000\n",
+ " 75% 13.000000\n",
+ " max 233.000000\n",
+ "AS count 44.000000\n",
+ " mean 9.068182\n",
+ " std 21.667034\n",
+ " min 0.000000\n",
+ " 25% 0.000000\n",
+ " 50% 1.000000\n",
+ " 75% 8.000000\n",
+ " max 123.000000\n",
+ "EU count 45.000000\n",
+ " mean 142.222222\n",
+ " std 97.421738\n",
+ " min 0.000000\n",
+ " 25% 59.000000\n",
+ " 50% 128.000000\n",
+ " 75% 195.000000\n",
+ " max 370.000000\n",
+ "OC count 16.000000\n",
+ " mean 35.625000\n",
+ " std 64.555790\n",
+ " min 0.000000\n",
+ " 25% 1.000000\n",
+ " 50% 8.500000\n",
+ " 75% 23.250000\n",
+ " max 212.000000\n",
+ "SA count 12.000000\n",
+ " mean 62.416667\n",
+ " std 88.620189\n",
+ " min 1.000000\n",
+ " 25% 3.000000\n",
+ " 50% 12.000000\n",
+ " 75% 98.500000\n",
+ " max 221.000000\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "drinks.groupby('continent').wine_servings.describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 6. Print the mean alcohol consumption per continent for every column"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " beer_servings | \n",
+ " spirit_servings | \n",
+ " wine_servings | \n",
+ " total_litres_of_pure_alcohol | \n",
+ "
\n",
+ " \n",
+ " continent | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " AF | \n",
+ " 61.471698 | \n",
+ " 16.339623 | \n",
+ " 16.264151 | \n",
+ " 3.007547 | \n",
+ "
\n",
+ " \n",
+ " AS | \n",
+ " 37.045455 | \n",
+ " 60.840909 | \n",
+ " 9.068182 | \n",
+ " 2.170455 | \n",
+ "
\n",
+ " \n",
+ " EU | \n",
+ " 193.777778 | \n",
+ " 132.555556 | \n",
+ " 142.222222 | \n",
+ " 8.617778 | \n",
+ "
\n",
+ " \n",
+ " OC | \n",
+ " 89.687500 | \n",
+ " 58.437500 | \n",
+ " 35.625000 | \n",
+ " 3.381250 | \n",
+ "
\n",
+ " \n",
+ " SA | \n",
+ " 175.083333 | \n",
+ " 114.750000 | \n",
+ " 62.416667 | \n",
+ " 6.308333 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " beer_servings spirit_servings wine_servings \\\n",
+ "continent \n",
+ "AF 61.471698 16.339623 16.264151 \n",
+ "AS 37.045455 60.840909 9.068182 \n",
+ "EU 193.777778 132.555556 142.222222 \n",
+ "OC 89.687500 58.437500 35.625000 \n",
+ "SA 175.083333 114.750000 62.416667 \n",
+ "\n",
+ " total_litres_of_pure_alcohol \n",
+ "continent \n",
+ "AF 3.007547 \n",
+ "AS 2.170455 \n",
+ "EU 8.617778 \n",
+ "OC 3.381250 \n",
+ "SA 6.308333 "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "drinks.groupby('continent').mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 7. Print the median alcohol consumption per continent for every column"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " beer_servings | \n",
+ " spirit_servings | \n",
+ " wine_servings | \n",
+ " total_litres_of_pure_alcohol | \n",
+ "
\n",
+ " \n",
+ " continent | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " AF | \n",
+ " 32.0 | \n",
+ " 3.0 | \n",
+ " 2.0 | \n",
+ " 2.30 | \n",
+ "
\n",
+ " \n",
+ " AS | \n",
+ " 17.5 | \n",
+ " 16.0 | \n",
+ " 1.0 | \n",
+ " 1.20 | \n",
+ "
\n",
+ " \n",
+ " EU | \n",
+ " 219.0 | \n",
+ " 122.0 | \n",
+ " 128.0 | \n",
+ " 10.00 | \n",
+ "
\n",
+ " \n",
+ " OC | \n",
+ " 52.5 | \n",
+ " 37.0 | \n",
+ " 8.5 | \n",
+ " 1.75 | \n",
+ "
\n",
+ " \n",
+ " SA | \n",
+ " 162.5 | \n",
+ " 108.5 | \n",
+ " 12.0 | \n",
+ " 6.85 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " beer_servings spirit_servings wine_servings \\\n",
+ "continent \n",
+ "AF 32.0 3.0 2.0 \n",
+ "AS 17.5 16.0 1.0 \n",
+ "EU 219.0 122.0 128.0 \n",
+ "OC 52.5 37.0 8.5 \n",
+ "SA 162.5 108.5 12.0 \n",
+ "\n",
+ " total_litres_of_pure_alcohol \n",
+ "continent \n",
+ "AF 2.30 \n",
+ "AS 1.20 \n",
+ "EU 10.00 \n",
+ "OC 1.75 \n",
+ "SA 6.85 "
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "drinks.groupby('continent').median()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 8. Print the mean, min and max values for spirit consumption.\n",
+ "#### This time output a DataFrame"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " mean | \n",
+ " min | \n",
+ " max | \n",
+ "
\n",
+ " \n",
+ " continent | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " AF | \n",
+ " 16.339623 | \n",
+ " 0 | \n",
+ " 152 | \n",
+ "
\n",
+ " \n",
+ " AS | \n",
+ " 60.840909 | \n",
+ " 0 | \n",
+ " 326 | \n",
+ "
\n",
+ " \n",
+ " EU | \n",
+ " 132.555556 | \n",
+ " 0 | \n",
+ " 373 | \n",
+ "
\n",
+ " \n",
+ " OC | \n",
+ " 58.437500 | \n",
+ " 0 | \n",
+ " 254 | \n",
+ "
\n",
+ " \n",
+ " SA | \n",
+ " 114.750000 | \n",
+ " 25 | \n",
+ " 302 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " mean min max\n",
+ "continent \n",
+ "AF 16.339623 0 152\n",
+ "AS 60.840909 0 326\n",
+ "EU 132.555556 0 373\n",
+ "OC 58.437500 0 254\n",
+ "SA 114.750000 25 302"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "drinks.groupby('continent').spirit_servings.agg(['mean', 'min', 'max'])"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3.9.7 ('base')",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.7"
+ },
+ "toc": {
+ "base_numbering": 1,
+ "nav_menu": {},
+ "number_sections": true,
+ "sideBar": true,
+ "skip_h1_title": false,
+ "title_cell": "Table of Contents",
+ "title_sidebar": "Contents",
+ "toc_cell": false,
+ "toc_position": {},
+ "toc_section_display": true,
+ "toc_window_display": false
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/02_pandas_tips&tricks/Solutions/08_grouping.ipynb b/02_pandas_tips&tricks/Solutions/08_grouping.ipynb
new file mode 100644
index 0000000..1283e66
--- /dev/null
+++ b/02_pandas_tips&tricks/Solutions/08_grouping.ipynb
@@ -0,0 +1,593 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Occupation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 3. Assign it to a variable called users."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " age | \n",
+ " gender | \n",
+ " occupation | \n",
+ " zip_code | \n",
+ "
\n",
+ " \n",
+ " user_id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 24 | \n",
+ " M | \n",
+ " technician | \n",
+ " 85711 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 53 | \n",
+ " F | \n",
+ " other | \n",
+ " 94043 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 23 | \n",
+ " M | \n",
+ " writer | \n",
+ " 32067 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 24 | \n",
+ " M | \n",
+ " technician | \n",
+ " 43537 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 33 | \n",
+ " F | \n",
+ " other | \n",
+ " 15213 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " age gender occupation zip_code\n",
+ "user_id \n",
+ "1 24 M technician 85711\n",
+ "2 53 F other 94043\n",
+ "3 23 M writer 32067\n",
+ "4 24 M technician 43537\n",
+ "5 33 F other 15213"
+ ]
+ },
+ "execution_count": 65,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "users = pd.read_table('https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user', \n",
+ " sep='|', index_col='user_id')\n",
+ "users.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 4. Discover what is the mean age per occupation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "occupation\n",
+ "administrator 38.746835\n",
+ "artist 31.392857\n",
+ "doctor 43.571429\n",
+ "educator 42.010526\n",
+ "engineer 36.388060\n",
+ "entertainment 29.222222\n",
+ "executive 38.718750\n",
+ "healthcare 41.562500\n",
+ "homemaker 32.571429\n",
+ "lawyer 36.750000\n",
+ "librarian 40.000000\n",
+ "marketing 37.615385\n",
+ "none 26.555556\n",
+ "other 34.523810\n",
+ "programmer 33.121212\n",
+ "retired 63.071429\n",
+ "salesman 35.666667\n",
+ "scientist 35.548387\n",
+ "student 22.081633\n",
+ "technician 33.148148\n",
+ "writer 36.311111\n",
+ "Name: age, dtype: float64"
+ ]
+ },
+ "execution_count": 66,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "users.groupby('occupation').age.mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 5. Discover the Male ratio per occupation and sort it from the most to the least"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 150,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "doctor 100.000000\n",
+ "engineer 97.014925\n",
+ "technician 96.296296\n",
+ "retired 92.857143\n",
+ "programmer 90.909091\n",
+ "executive 90.625000\n",
+ "scientist 90.322581\n",
+ "entertainment 88.888889\n",
+ "lawyer 83.333333\n",
+ "salesman 75.000000\n",
+ "educator 72.631579\n",
+ "student 69.387755\n",
+ "other 65.714286\n",
+ "marketing 61.538462\n",
+ "writer 57.777778\n",
+ "none 55.555556\n",
+ "administrator 54.430380\n",
+ "artist 53.571429\n",
+ "librarian 43.137255\n",
+ "healthcare 31.250000\n",
+ "homemaker 14.285714\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 150,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# create a function\n",
+ "def gender_to_numeric(x):\n",
+ " if x == 'M':\n",
+ " return 1\n",
+ " if x == 'F':\n",
+ " return 0\n",
+ "\n",
+ "# apply the function to the gender column and create a new column\n",
+ "users['gender_n'] = users['gender'].apply(gender_to_numeric)\n",
+ "\n",
+ "\n",
+ "a = users.groupby('occupation').gender_n.sum() / users.occupation.value_counts() * 100 \n",
+ "\n",
+ "# sort to the most male \n",
+ "a.sort_values(ascending = False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 6. For each occupation, calculate the minimum and maximum ages"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 151,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " min | \n",
+ " max | \n",
+ "
\n",
+ " \n",
+ " occupation | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " administrator | \n",
+ " 21 | \n",
+ " 70 | \n",
+ "
\n",
+ " \n",
+ " artist | \n",
+ " 19 | \n",
+ " 48 | \n",
+ "
\n",
+ " \n",
+ " doctor | \n",
+ " 28 | \n",
+ " 64 | \n",
+ "
\n",
+ " \n",
+ " educator | \n",
+ " 23 | \n",
+ " 63 | \n",
+ "
\n",
+ " \n",
+ " engineer | \n",
+ " 22 | \n",
+ " 70 | \n",
+ "
\n",
+ " \n",
+ " entertainment | \n",
+ " 15 | \n",
+ " 50 | \n",
+ "
\n",
+ " \n",
+ " executive | \n",
+ " 22 | \n",
+ " 69 | \n",
+ "
\n",
+ " \n",
+ " healthcare | \n",
+ " 22 | \n",
+ " 62 | \n",
+ "
\n",
+ " \n",
+ " homemaker | \n",
+ " 20 | \n",
+ " 50 | \n",
+ "
\n",
+ " \n",
+ " lawyer | \n",
+ " 21 | \n",
+ " 53 | \n",
+ "
\n",
+ " \n",
+ " librarian | \n",
+ " 23 | \n",
+ " 69 | \n",
+ "
\n",
+ " \n",
+ " marketing | \n",
+ " 24 | \n",
+ " 55 | \n",
+ "
\n",
+ " \n",
+ " none | \n",
+ " 11 | \n",
+ " 55 | \n",
+ "
\n",
+ " \n",
+ " other | \n",
+ " 13 | \n",
+ " 64 | \n",
+ "
\n",
+ " \n",
+ " programmer | \n",
+ " 20 | \n",
+ " 63 | \n",
+ "
\n",
+ " \n",
+ " retired | \n",
+ " 51 | \n",
+ " 73 | \n",
+ "
\n",
+ " \n",
+ " salesman | \n",
+ " 18 | \n",
+ " 66 | \n",
+ "
\n",
+ " \n",
+ " scientist | \n",
+ " 23 | \n",
+ " 55 | \n",
+ "
\n",
+ " \n",
+ " student | \n",
+ " 7 | \n",
+ " 42 | \n",
+ "
\n",
+ " \n",
+ " technician | \n",
+ " 21 | \n",
+ " 55 | \n",
+ "
\n",
+ " \n",
+ " writer | \n",
+ " 18 | \n",
+ " 60 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " min max\n",
+ "occupation \n",
+ "administrator 21 70\n",
+ "artist 19 48\n",
+ "doctor 28 64\n",
+ "educator 23 63\n",
+ "engineer 22 70\n",
+ "entertainment 15 50\n",
+ "executive 22 69\n",
+ "healthcare 22 62\n",
+ "homemaker 20 50\n",
+ "lawyer 21 53\n",
+ "librarian 23 69\n",
+ "marketing 24 55\n",
+ "none 11 55\n",
+ "other 13 64\n",
+ "programmer 20 63\n",
+ "retired 51 73\n",
+ "salesman 18 66\n",
+ "scientist 23 55\n",
+ "student 7 42\n",
+ "technician 21 55\n",
+ "writer 18 60"
+ ]
+ },
+ "execution_count": 151,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "users.groupby('occupation').age.agg(['min', 'max'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 7. For each combination of occupation and gender, calculate the mean age"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 152,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "occupation gender\n",
+ "administrator F 40.638889\n",
+ " M 37.162791\n",
+ "artist F 30.307692\n",
+ " M 32.333333\n",
+ "doctor M 43.571429\n",
+ "educator F 39.115385\n",
+ " M 43.101449\n",
+ "engineer F 29.500000\n",
+ " M 36.600000\n",
+ "entertainment F 31.000000\n",
+ " M 29.000000\n",
+ "executive F 44.000000\n",
+ " M 38.172414\n",
+ "healthcare F 39.818182\n",
+ " M 45.400000\n",
+ "homemaker F 34.166667\n",
+ " M 23.000000\n",
+ "lawyer F 39.500000\n",
+ " M 36.200000\n",
+ "librarian F 40.000000\n",
+ " M 40.000000\n",
+ "marketing F 37.200000\n",
+ " M 37.875000\n",
+ "none F 36.500000\n",
+ " M 18.600000\n",
+ "other F 35.472222\n",
+ " M 34.028986\n",
+ "programmer F 32.166667\n",
+ " M 33.216667\n",
+ "retired F 70.000000\n",
+ " M 62.538462\n",
+ "salesman F 27.000000\n",
+ " M 38.555556\n",
+ "scientist F 28.333333\n",
+ " M 36.321429\n",
+ "student F 20.750000\n",
+ " M 22.669118\n",
+ "technician F 38.000000\n",
+ " M 32.961538\n",
+ "writer F 37.631579\n",
+ " M 35.346154\n",
+ "Name: age, dtype: float64"
+ ]
+ },
+ "execution_count": 152,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "users.groupby(['occupation', 'gender']).age.mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 8. For each occupation present the percentage of women and men"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 154,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "occupation gender\n",
+ "administrator F 45.569620\n",
+ " M 54.430380\n",
+ "artist F 46.428571\n",
+ " M 53.571429\n",
+ "doctor M 100.000000\n",
+ "educator F 27.368421\n",
+ " M 72.631579\n",
+ "engineer F 2.985075\n",
+ " M 97.014925\n",
+ "entertainment F 11.111111\n",
+ " M 88.888889\n",
+ "executive F 9.375000\n",
+ " M 90.625000\n",
+ "healthcare F 68.750000\n",
+ " M 31.250000\n",
+ "homemaker F 85.714286\n",
+ " M 14.285714\n",
+ "lawyer F 16.666667\n",
+ " M 83.333333\n",
+ "librarian F 56.862745\n",
+ " M 43.137255\n",
+ "marketing F 38.461538\n",
+ " M 61.538462\n",
+ "none F 44.444444\n",
+ " M 55.555556\n",
+ "other F 34.285714\n",
+ " M 65.714286\n",
+ "programmer F 9.090909\n",
+ " M 90.909091\n",
+ "retired F 7.142857\n",
+ " M 92.857143\n",
+ "salesman F 25.000000\n",
+ " M 75.000000\n",
+ "scientist F 9.677419\n",
+ " M 90.322581\n",
+ "student F 30.612245\n",
+ " M 69.387755\n",
+ "technician F 3.703704\n",
+ " M 96.296296\n",
+ "writer F 42.222222\n",
+ " M 57.777778\n",
+ "Name: gender, dtype: float64"
+ ]
+ },
+ "execution_count": 154,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# create a data frame and apply count to gender\n",
+ "gender_ocup = users.groupby(['occupation', 'gender']).agg({'gender': 'count'})\n",
+ "\n",
+ "# create a DataFrame and apply count for each occupation\n",
+ "occup_count = users.groupby(['occupation']).agg('count')\n",
+ "\n",
+ "# divide the gender_ocup per the occup_count and multiply per 100\n",
+ "occup_gender = gender_ocup.div(occup_count, level = \"occupation\") * 100\n",
+ "\n",
+ "# present all rows from the 'gender column'\n",
+ "occup_gender.loc[: , 'gender']"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3.9.7 ('base')",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.7"
+ },
+ "toc": {
+ "base_numbering": 1,
+ "nav_menu": {},
+ "number_sections": true,
+ "sideBar": true,
+ "skip_h1_title": false,
+ "title_cell": "Table of Contents",
+ "title_sidebar": "Contents",
+ "toc_cell": false,
+ "toc_position": {},
+ "toc_section_display": true,
+ "toc_window_display": false
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/02_pandas_tips&tricks/Solutions/09_grouping.ipynb b/02_pandas_tips&tricks/Solutions/09_grouping.ipynb
new file mode 100644
index 0000000..a825886
--- /dev/null
+++ b/02_pandas_tips&tricks/Solutions/09_grouping.ipynb
@@ -0,0 +1,749 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Regiment"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 2. Create the DataFrame with the following values:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "raw_data = {'regiment': ['Nighthawks', 'Nighthawks', 'Nighthawks', 'Nighthawks', 'Dragoons', 'Dragoons', 'Dragoons', 'Dragoons', 'Scouts', 'Scouts', 'Scouts', 'Scouts'], \n",
+ " 'company': ['1st', '1st', '2nd', '2nd', '1st', '1st', '2nd', '2nd','1st', '1st', '2nd', '2nd'], \n",
+ " 'name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze', 'Jacon', 'Ryaner', 'Sone', 'Sloan', 'Piger', 'Riani', 'Ali'], \n",
+ " 'preTestScore': [4, 24, 31, 2, 3, 4, 24, 31, 2, 3, 2, 3],\n",
+ " 'postTestScore': [25, 94, 57, 62, 70, 25, 94, 57, 62, 70, 62, 70]}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 3. Assign it to a variable called regiment.\n",
+ "#### Don't forget to name each column"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " regiment | \n",
+ " company | \n",
+ " name | \n",
+ " preTestScore | \n",
+ " postTestScore | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Nighthawks | \n",
+ " 1st | \n",
+ " Miller | \n",
+ " 4 | \n",
+ " 25 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Nighthawks | \n",
+ " 1st | \n",
+ " Jacobson | \n",
+ " 24 | \n",
+ " 94 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Nighthawks | \n",
+ " 2nd | \n",
+ " Ali | \n",
+ " 31 | \n",
+ " 57 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Nighthawks | \n",
+ " 2nd | \n",
+ " Milner | \n",
+ " 2 | \n",
+ " 62 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Dragoons | \n",
+ " 1st | \n",
+ " Cooze | \n",
+ " 3 | \n",
+ " 70 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Dragoons | \n",
+ " 1st | \n",
+ " Jacon | \n",
+ " 4 | \n",
+ " 25 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Dragoons | \n",
+ " 2nd | \n",
+ " Ryaner | \n",
+ " 24 | \n",
+ " 94 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Dragoons | \n",
+ " 2nd | \n",
+ " Sone | \n",
+ " 31 | \n",
+ " 57 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Scouts | \n",
+ " 1st | \n",
+ " Sloan | \n",
+ " 2 | \n",
+ " 62 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Scouts | \n",
+ " 1st | \n",
+ " Piger | \n",
+ " 3 | \n",
+ " 70 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Scouts | \n",
+ " 2nd | \n",
+ " Riani | \n",
+ " 2 | \n",
+ " 62 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " Scouts | \n",
+ " 2nd | \n",
+ " Ali | \n",
+ " 3 | \n",
+ " 70 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " regiment company name preTestScore postTestScore\n",
+ "0 Nighthawks 1st Miller 4 25\n",
+ "1 Nighthawks 1st Jacobson 24 94\n",
+ "2 Nighthawks 2nd Ali 31 57\n",
+ "3 Nighthawks 2nd Milner 2 62\n",
+ "4 Dragoons 1st Cooze 3 70\n",
+ "5 Dragoons 1st Jacon 4 25\n",
+ "6 Dragoons 2nd Ryaner 24 94\n",
+ "7 Dragoons 2nd Sone 31 57\n",
+ "8 Scouts 1st Sloan 2 62\n",
+ "9 Scouts 1st Piger 3 70\n",
+ "10 Scouts 2nd Riani 2 62\n",
+ "11 Scouts 2nd Ali 3 70"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "regiment = pd.DataFrame(raw_data, columns = raw_data.keys())\n",
+ "regiment"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 4. What is the mean preTestScore from the regiment Nighthawks? "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " preTestScore | \n",
+ " postTestScore | \n",
+ "
\n",
+ " \n",
+ " regiment | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Dragoons | \n",
+ " 15.50 | \n",
+ " 61.5 | \n",
+ "
\n",
+ " \n",
+ " Nighthawks | \n",
+ " 15.25 | \n",
+ " 59.5 | \n",
+ "
\n",
+ " \n",
+ " Scouts | \n",
+ " 2.50 | \n",
+ " 66.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " preTestScore postTestScore\n",
+ "regiment \n",
+ "Dragoons 15.50 61.5\n",
+ "Nighthawks 15.25 59.5\n",
+ "Scouts 2.50 66.0"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "regiment[regiment['regiment'] == 'Nighthawks'].groupby('regiment').mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 5. Present general statistics by company"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " | \n",
+ " postTestScore | \n",
+ " preTestScore | \n",
+ "
\n",
+ " \n",
+ " company | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1st | \n",
+ " count | \n",
+ " 6.000000 | \n",
+ " 6.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 57.666667 | \n",
+ " 6.666667 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 27.485754 | \n",
+ " 8.524475 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 25.000000 | \n",
+ " 2.000000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 34.250000 | \n",
+ " 3.000000 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 66.000000 | \n",
+ " 3.500000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 70.000000 | \n",
+ " 4.000000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 94.000000 | \n",
+ " 24.000000 | \n",
+ "
\n",
+ " \n",
+ " 2nd | \n",
+ " count | \n",
+ " 6.000000 | \n",
+ " 6.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 67.000000 | \n",
+ " 15.500000 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 14.057027 | \n",
+ " 14.652645 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 57.000000 | \n",
+ " 2.000000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 58.250000 | \n",
+ " 2.250000 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 62.000000 | \n",
+ " 13.500000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 68.000000 | \n",
+ " 29.250000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 94.000000 | \n",
+ " 31.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " postTestScore preTestScore\n",
+ "company \n",
+ "1st count 6.000000 6.000000\n",
+ " mean 57.666667 6.666667\n",
+ " std 27.485754 8.524475\n",
+ " min 25.000000 2.000000\n",
+ " 25% 34.250000 3.000000\n",
+ " 50% 66.000000 3.500000\n",
+ " 75% 70.000000 4.000000\n",
+ " max 94.000000 24.000000\n",
+ "2nd count 6.000000 6.000000\n",
+ " mean 67.000000 15.500000\n",
+ " std 14.057027 14.652645\n",
+ " min 57.000000 2.000000\n",
+ " 25% 58.250000 2.250000\n",
+ " 50% 62.000000 13.500000\n",
+ " 75% 68.000000 29.250000\n",
+ " max 94.000000 31.000000"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "regiment.groupby('company').describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 6. What is the mean of each company's preTestScore?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "company\n",
+ "1st 6.666667\n",
+ "2nd 15.500000\n",
+ "Name: preTestScore, dtype: float64"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "regiment.groupby('company').preTestScore.mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 7. Present the mean preTestScores grouped by regiment and company"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "regiment company\n",
+ "Dragoons 1st 3.5\n",
+ " 2nd 27.5\n",
+ "Nighthawks 1st 14.0\n",
+ " 2nd 16.5\n",
+ "Scouts 1st 2.5\n",
+ " 2nd 2.5\n",
+ "Name: preTestScore, dtype: float64"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "regiment.groupby(['regiment', 'company']).preTestScore.mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 8. Present the mean preTestScores grouped by regiment and company without heirarchical indexing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " company | \n",
+ " 1st | \n",
+ " 2nd | \n",
+ "
\n",
+ " \n",
+ " regiment | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Dragoons | \n",
+ " 3.5 | \n",
+ " 27.5 | \n",
+ "
\n",
+ " \n",
+ " Nighthawks | \n",
+ " 14.0 | \n",
+ " 16.5 | \n",
+ "
\n",
+ " \n",
+ " Scouts | \n",
+ " 2.5 | \n",
+ " 2.5 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "company 1st 2nd\n",
+ "regiment \n",
+ "Dragoons 3.5 27.5\n",
+ "Nighthawks 14.0 16.5\n",
+ "Scouts 2.5 2.5"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "regiment.groupby(['regiment', 'company']).preTestScore.mean().unstack()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 9. Group the entire dataframe by regiment and company"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " | \n",
+ " preTestScore | \n",
+ " postTestScore | \n",
+ "
\n",
+ " \n",
+ " regiment | \n",
+ " company | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Dragoons | \n",
+ " 1st | \n",
+ " 3.5 | \n",
+ " 47.5 | \n",
+ "
\n",
+ " \n",
+ " 2nd | \n",
+ " 27.5 | \n",
+ " 75.5 | \n",
+ "
\n",
+ " \n",
+ " Nighthawks | \n",
+ " 1st | \n",
+ " 14.0 | \n",
+ " 59.5 | \n",
+ "
\n",
+ " \n",
+ " 2nd | \n",
+ " 16.5 | \n",
+ " 59.5 | \n",
+ "
\n",
+ " \n",
+ " Scouts | \n",
+ " 1st | \n",
+ " 2.5 | \n",
+ " 66.0 | \n",
+ "
\n",
+ " \n",
+ " 2nd | \n",
+ " 2.5 | \n",
+ " 66.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " preTestScore postTestScore\n",
+ "regiment company \n",
+ "Dragoons 1st 3.5 47.5\n",
+ " 2nd 27.5 75.5\n",
+ "Nighthawks 1st 14.0 59.5\n",
+ " 2nd 16.5 59.5\n",
+ "Scouts 1st 2.5 66.0\n",
+ " 2nd 2.5 66.0"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "regiment.groupby(['regiment', 'company']).mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 10. What is the number of observations in each regiment and company"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "company regiment \n",
+ "1st Dragoons 2\n",
+ " Nighthawks 2\n",
+ " Scouts 2\n",
+ "2nd Dragoons 2\n",
+ " Nighthawks 2\n",
+ " Scouts 2\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "regiment.groupby(['company', 'regiment']).size()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Step 11. Iterate over a group and print the name and the whole data from the regiment"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Dragoons\n",
+ " regiment company name preTestScore postTestScore\n",
+ "4 Dragoons 1st Cooze 3 70\n",
+ "5 Dragoons 1st Jacon 4 25\n",
+ "6 Dragoons 2nd Ryaner 24 94\n",
+ "7 Dragoons 2nd Sone 31 57\n",
+ "Nighthawks\n",
+ " regiment company name preTestScore postTestScore\n",
+ "0 Nighthawks 1st Miller 4 25\n",
+ "1 Nighthawks 1st Jacobson 24 94\n",
+ "2 Nighthawks 2nd Ali 31 57\n",
+ "3 Nighthawks 2nd Milner 2 62\n",
+ "Scouts\n",
+ " regiment company name preTestScore postTestScore\n",
+ "8 Scouts 1st Sloan 2 62\n",
+ "9 Scouts 1st Piger 3 70\n",
+ "10 Scouts 2nd Riani 2 62\n",
+ "11 Scouts 2nd Ali 3 70\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Group the dataframe by regiment, and for each regiment,\n",
+ "for name, group in regiment.groupby('regiment'):\n",
+ " # print the name of the regiment\n",
+ " print(name)\n",
+ " # print the data of that regiment\n",
+ " print(group)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3.9.7 ('base')",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.7"
+ },
+ "toc": {
+ "base_numbering": 1,
+ "nav_menu": {},
+ "number_sections": true,
+ "sideBar": true,
+ "skip_h1_title": false,
+ "title_cell": "Table of Contents",
+ "title_sidebar": "Contents",
+ "toc_cell": false,
+ "toc_position": {},
+ "toc_section_display": true,
+ "toc_window_display": false
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}