From 2bee61561d80b65e2de348c99cd37e3b422bf45a Mon Sep 17 00:00:00 2001 From: AammarTufail Date: Fri, 5 Aug 2022 14:57:42 +0200 Subject: [PATCH] New Assignments Added --- .../Excercises/09_grouping.ipynb | 11 +- .../Excercises/10_apply.ipynb | 212 ++++++++++++++++++ .../Excercises/11_apply.ipynb | 179 +++++++++++++++ .../Solutions/EDA_food_data.ipynb | 6 +- 4 files changed, 402 insertions(+), 6 deletions(-) create mode 100644 02_pandas_tips&tricks/Excercises/10_apply.ipynb create mode 100644 02_pandas_tips&tricks/Excercises/11_apply.ipynb diff --git a/02_pandas_tips&tricks/Excercises/09_grouping.ipynb b/02_pandas_tips&tricks/Excercises/09_grouping.ipynb index abf1c12..028059c 100644 --- a/02_pandas_tips&tricks/Excercises/09_grouping.ipynb +++ b/02_pandas_tips&tricks/Excercises/09_grouping.ipynb @@ -197,9 +197,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3.9.7 ('base')", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -211,7 +211,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", - "version": "2.7.11" + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6" + } } }, "nbformat": 4, diff --git a/02_pandas_tips&tricks/Excercises/10_apply.ipynb b/02_pandas_tips&tricks/Excercises/10_apply.ipynb new file mode 100644 index 0000000..e04bdf8 --- /dev/null +++ b/02_pandas_tips&tricks/Excercises/10_apply.ipynb @@ -0,0 +1,212 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Student Alcohol Consumption" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Introduction:\n", + "\n", + "This time you will download a dataset from the UCI.\n", + "\n", + "### Step 1. Import the necessary libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/Students_Alcohol_Consumption/student-mat.csv)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 3. Assign it to a variable called df." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 4. For the purpose of this exercise slice the dataframe from 'school' until the 'guardian' column" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 5. Create a lambda function that will capitalize strings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 6. Capitalize both Mjob and Fjob" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 7. Print the last elements of the data set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 8. Did you notice the original dataframe is still lowercase? Why is that? Fix it and capitalize Mjob and Fjob." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 9. Create a function called majority that returns a boolean value to a new column called legal_drinker (Consider majority as older than 17 years old)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 10. Multiply every number of the dataset by 10. \n", + "##### I know this makes no sense, don't forget it is just an exercise" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Python 3.9.7 ('base')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6" + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/02_pandas_tips&tricks/Excercises/11_apply.ipynb b/02_pandas_tips&tricks/Excercises/11_apply.ipynb new file mode 100644 index 0000000..27aac66 --- /dev/null +++ b/02_pandas_tips&tricks/Excercises/11_apply.ipynb @@ -0,0 +1,179 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# United States - Crime Rates - 1960 - 2014" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Introduction:\n", + "\n", + "This time you will create a data \n", + "\n", + "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", + "\n", + "### Step 1. Import the necessary libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/04_Apply/US_Crime_Rates/US_Crime_Rates_1960_2014.csv). " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 3. Assign it to a variable called crime." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 4. What is the type of the columns?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Have you noticed that the type of Year is int64. But pandas has a different type to work with Time Series. Let's see it now.\n", + "\n", + "### Step 5. Convert the type of the column Year to datetime64" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 6. Set the Year column as the index of the dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 7. Delete the Total column" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 8. Group the year by decades and sum the values\n", + "\n", + "#### Pay attention to the Population column number, summing this column is a mistake" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 9. What is the most dangerous decade to live in the US?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/02_pandas_tips&tricks/Solutions/EDA_food_data.ipynb b/02_pandas_tips&tricks/Solutions/EDA_food_data.ipynb index 5614a51..5f138a0 100644 --- a/02_pandas_tips&tricks/Solutions/EDA_food_data.ipynb +++ b/02_pandas_tips&tricks/Solutions/EDA_food_data.ipynb @@ -2712,7 +2712,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.10.5 64-bit (windows store)", + "display_name": "Python 3.9.7 ('base')", "language": "python", "name": "python3" }, @@ -2726,12 +2726,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.9.7" }, "orig_nbformat": 4, "vscode": { "interpreter": { - "hash": "2a50a5f25cb96edfd9254847fbf2f642dc98c240ea8775b6be580f22b3253a59" + "hash": "9b9ced3d0af0cb92224640680f81aa3cf99730ecb34e6382d788e77830a0b6a6" } } },