diff --git a/numpy/02.10NumPy For Data Analysis.ipynb b/numpy/02.10NumPy For Data Analysis.ipynb new file mode 100644 index 00000000..3361ecc1 --- /dev/null +++ b/numpy/02.10NumPy For Data Analysis.ipynb @@ -0,0 +1,2254 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## What is Numpy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "NumPy is a python library used for working with arrays.\n", + "\n", + "It also has functions for working in domain of linear algebra, fourier transform, and matrices.\n", + "\n", + "NumPy was created in 2005 by Travis Oliphant. It is an open source project and you can use it freely.\n", + "\n", + "NumPy stands for Numerical Python.\n", + "\n", + "Numpy is also incredibly fast, as it has bindings to C libraries. For more info on why you would want to use Arrays instead of lists, check out this great" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installation Instructions\n", + "\n", + "**It is highly recommended you install Python using the Anaconda distribution to make sure all underlying dependencies (such as Linear Algebra libraries) all sync up with the use of a conda install. If you have Anaconda, install NumPy by going to your terminal or command prompt and typing:**\n", + " \n", + " conda install numpy\n", + " \n", + "**If you do not have Anaconda and can not install it, please refer to [Numpy's official documentation on various installation instructions.](http://docs.scipy.org/doc/numpy-1.10.1/user/install.html)**" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[9, 8, 7]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_list= [9,8,7]\n", + "my_list" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([9, 8, 7])" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "np.array(my_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 2, 3, 4, 5])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list1=[1,2,3,4,5]\n", + "np.array(list1)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1, 2, 3], [4, 5, 6], [6, 7, 7]]\n", + "\n", + "\n", + "[[1 2 3]\n", + " [4 5 6]\n", + " [6 7 7]]\n" + ] + } + ], + "source": [ + "matrix=[[1,2,3],[4,5,6],[6,7,7]]\n", + "print(matrix)\n", + "print ('\\n')\n", + "\n", + "print (np.array(matrix))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Built-in Methods\n", + "\n", + "There are lots of built-in ways to generate Arrays" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### NumPy is used to work with arrays. The array object in NumPy is called ndarray." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1 2 3 4 5]\n", + "\n" + ] + } + ], + "source": [ + "arr = np.array([1, 2, 3, 4, 5])\n", + "\n", + "print(arr)\n", + "\n", + "print(type(arr))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.arange(0,20)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.arange(0,50,3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### linspace\n", + "Return evenly spaced numbers over a specified interval." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0., 10., 20., 30., 40., 50.])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.linspace (0,50,6)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0. , 0.25641026, 0.51282051, 0.76923077, 1.02564103,\n", + " 1.28205128, 1.53846154, 1.79487179, 2.05128205, 2.30769231,\n", + " 2.56410256, 2.82051282, 3.07692308, 3.33333333, 3.58974359,\n", + " 3.84615385, 4.1025641 , 4.35897436, 4.61538462, 4.87179487,\n", + " 5.12820513, 5.38461538, 5.64102564, 5.8974359 , 6.15384615,\n", + " 6.41025641, 6.66666667, 6.92307692, 7.17948718, 7.43589744,\n", + " 7.69230769, 7.94871795, 8.20512821, 8.46153846, 8.71794872,\n", + " 8.97435897, 9.23076923, 9.48717949, 9.74358974, 10. ])" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.linspace (0,10,40)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### zeros and ones\n", + "\n", + "Generate arrays of zeros or ones" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0., 0., 0., 0., 0., 0., 0., 0., 0.])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.zeros(9)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.zeros((3,4))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1., 1., 1., 1.])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.ones(4)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1., 1., 1., 1.],\n", + " [1., 1., 1., 1.],\n", + " [1., 1., 1., 1.]])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.ones((3,4))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## eye\n", + "\n", + "Creates an identity matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0.],\n", + " [0., 0., 1., 0., 0.],\n", + " [0., 0., 0., 1., 0.],\n", + " [0., 0., 0., 0., 1.]])" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.eye(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0.],\n", + " [0., 0., 1., 0., 0.],\n", + " [0., 0., 0., 1., 0.]])" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.eye(4,5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Random\n", + "### What is a Random Number?\n", + "Random number does NOT mean a different number every time. Random means something that can not be predicted logically." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### rand\n", + "Create an array of the given shape and populate it with\n", + "random samples from a uniform distribution\n", + "over ``[0, 1)``." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.7489597 , 0.78917258, 0.70866872])" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.random.rand(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0.54892249, 0.34257905, 0.0378045 , 0.94850873],\n", + " [0.64952372, 0.60916155, 0.02955791, 0.22780147],\n", + " [0.23216998, 0.41829629, 0.53139821, 0.33340072],\n", + " [0.51921418, 0.72563794, 0.38172859, 0.32186366]])" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.random.rand(4,4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### randn\n", + "\n", + "Return a sample (or samples) from the \"standard normal\" distribution. Unlike rand which is uniform:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0.38705145, -0.00165486])" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.random.randn(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0.28120086, 0.68961856, -1.08746915],\n", + " [-0.8188998 , -0.94987156, 0.39145483]])" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.random.randn(2,3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### randint\n", + "Return random integers from `low` (inclusive) to `high` (exclusive)." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.random.randint(1,7)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([53, 1, 5, 95, 28, 22, 76, 12, 45, 61])" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.random.randint(1,100,10)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "15" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.random.randint (1,111)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Some of the Array Attributes and methods\n", + "\n", + "Have a look some of the main attributes and methods or an array" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "arr = np.arange(50)\n", + "ranarr=np.random.randint (0,100,10)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n", + " 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([60, 44, 98, 38, 27, 97, 75, 53, 10, 40])" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ranarr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reshape \n", + "Covert existing array into new shape with same data." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9],\n", + " [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],\n", + " [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],\n", + " [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],\n", + " [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]])" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr.reshape(5,10)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,\n", + " 16, 17, 18, 19, 20, 21, 22, 23, 24],\n", + " [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,\n", + " 41, 42, 43, 44, 45, 46, 47, 48, 49]])" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr.reshape (2,25)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "arr1= np.arange(24)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23])" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr1" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[[ 0, 1, 2, 3],\n", + " [ 4, 5, 6, 7]],\n", + "\n", + " [[ 8, 9, 10, 11],\n", + " [12, 13, 14, 15]],\n", + "\n", + " [[16, 17, 18, 19],\n", + " [20, 21, 22, 23]]])" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr1.reshape(3,2,4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### max,min,argmax,argmin\n", + "\n", + "These are useful methods for finding max or min values. Or to find their index locations using argmin or argmax" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n", + " 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "arr.min()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "49" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([60, 44, 98, 38, 27, 97, 75, 53, 10, 40])" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ranarr" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "98" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ranarr.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ranarr.argmax()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ranarr.argmin()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Shape\n", + "The shape property is usually used to get the current shape of an array, but may also be used to reshape the array in-place by assigning a tuple of array dimensions to it" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(50,)" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Shape is an attribute that array have \n", + "# Vector\n", + "arr.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 1, 2, 3, 4],\n", + " [ 5, 6, 7, 8, 9],\n", + " [10, 11, 12, 13, 14],\n", + " [15, 16, 17, 18, 19],\n", + " [20, 21, 22, 23, 24],\n", + " [25, 26, 27, 28, 29],\n", + " [30, 31, 32, 33, 34],\n", + " [35, 36, 37, 38, 39],\n", + " [40, 41, 42, 43, 44],\n", + " [45, 46, 47, 48, 49]])" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Notice the the two set of array\n", + "arr.reshape(10,5)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,\n", + " 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,\n", + " 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,\n", + " 48, 49]])" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr.reshape (1,50)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(50, 1)" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr.reshape(50,1).shape" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dtype('int32')" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# You Can find the datatype of object in the array \n", + "arr.dtype" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A data type object (an instance of numpy.dtype class) describes how the bytes in the fixed-size block of memory corresponding to an array item should be interpreted. It describes the following aspects of the data:\n", + "\n", + "Type of the data (integer, float, Python object, etc.)\n", + "Size of the data (how many bytes is in e.g. the integer)\n", + "Byte order of the data (little-endian or big-endian)\n", + "If the data type is structured, an aggregate of other data types, (e.g., describing an array item consisting of an integer and a float),\n", + "what are the names of the “fields” of the structure, by which they can be accessed,\n", + "what is the data-type of each field, and\n", + "which part of the memory block each field takes.\n", + "If the data type is a sub-array, what is its shape and data type.\n", + "\n", + "https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.dtypes.html" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Numpy Indexing and Selection\n", + "We will explore how we are going to grab a prticular elements of an array " + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n", + " 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Bracket Indexing and Selection\n", + "The simplest way to pick one or some elements of an array looks very similar to python lists:" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "24" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get the value at the index 24\n", + "arr[24]" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]\n", + "\n", + "\n", + "[0 1 2 3 4 5 6 7 8 9]\n" + ] + } + ], + "source": [ + "# Get the value in the range \n", + "print (arr[4:24])\n", + "print ('\\n')\n", + "print (arr[0:10])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Broadcasting\n", + "\n", + "Here Numpy is different from normal List in python since it is having ability to broadcast" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n", + " 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Setting the value with index range (Broadcasting)\n", + "arr[0:10]=88\n", + "# Show \n", + "arr" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20])" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr=np.arange (0,21)\n", + "arr" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Slicing is Very important \n", + "slice_of_arr=arr[0:16]\n", + "#Show slice \n", + "slice_of_arr" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50])" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Change the value of Slice \n", + "slice_of_arr[:]=50\n", + "slice_of_arr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Note : Now the change is also there in original array " + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 16,\n", + " 17, 18, 19, 20])" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data is not copied, it's a view of the original array! This avoids memory problems!" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 16,\n", + " 17, 18, 19, 20])" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# To get a copy , need to be explicit \n", + "arr_copy=arr.copy ()\n", + "arr_copy " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Indexing & Selection for 2D Array(Matrix)\n", + "Two-dimensional (2D) arrays are indexed by two subscripts, one for the row and one for the column. Each element in the 2D array must by the same type, either a primitive type or object type.\n", + "\n", + "\n", + "The general format is arr_2d[row][col] or arr_2d[row,col]. I recommend usually using the comma notation for clarity." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]])" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "matrix= np.array(([1,2,3],[4,5,6],[7,8,9]))\n", + "# Show the value \n", + "matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([7, 8, 9])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Indexing Rows \n", + "# Grab 3nd row\n", + "matrix[2]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[2],\n", + " [5],\n", + " [8]])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#row[1][2] #row[1,2]\n", + "# Indexing Column \n", + "# Grab 2nd column \n", + "matrix[0:,1:2]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Format is arr_2d[row][col] or arr_2d[row,col]\n", + "\n", + "# Getting individual element value\n", + "matrix[1][0]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "matrix[1][2]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Getting individual element value\n", + "matrix[1,0]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1, 2],\n", + " [4, 5]])" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Matrix slicing \n", + "# Shape (2,2) from top left corner \n", + "matrix[0:2,0:2]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[2, 3],\n", + " [5, 6]])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Matrix slicing \n", + "# Shape (2,2) from top right corner \n", + "matrix[0:2,1:3]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[5, 6],\n", + " [8, 9]])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Matrix slicing \n", + "# Shape (2,2) from bottom right corner \n", + "matrix[1:,1:]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "arr=np.arange(25)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "b=arr.reshape(5,5)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 1, 2, 3, 4],\n", + " [ 5, 6, 7, 8, 9],\n", + " [10, 11, 12, 13, 14],\n", + " [15, 16, 17, 18, 19],\n", + " [20, 21, 22, 23, 24]])" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[18, 19],\n", + " [23, 24]])" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b[3:,3:]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 3],\n", + " [ 8],\n", + " [13],\n", + " [18],\n", + " [23]])" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Grab all the elements in 4th column\n", + "b[0:,3:4]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fancing Indexing \n", + "Fancy indexing is conceptually simple: it means passing an array of indices to access multiple array elements at once.\n", + "\n", + "lets see an example " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# Set up matrix \n", + "matrix1 = np.zeros ((11,11))\n", + "# Length of matrix\n", + "matrix1_length = matrix1.shape[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [ 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],\n", + " [ 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.],\n", + " [ 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6.],\n", + " [ 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8.],\n", + " [10., 10., 10., 10., 10., 10., 10., 10., 10., 10., 10.],\n", + " [12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.],\n", + " [14., 14., 14., 14., 14., 14., 14., 14., 14., 14., 14.],\n", + " [16., 16., 16., 16., 16., 16., 16., 16., 16., 16., 16.],\n", + " [18., 18., 18., 18., 18., 18., 18., 18., 18., 18., 18.],\n", + " [20., 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.]])" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "for i in range (matrix1_length):\n", + " matrix1[i]+=i\n", + "matrix1" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.],\n", + " [ 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6.],\n", + " [ 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8.],\n", + " [10., 10., 10., 10., 10., 10., 10., 10., 10., 10., 10.]])" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Fancy Indexing allows The following \n", + "matrix1[[2,3,4,5]]" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[10., 10., 10., 10., 10., 10., 10., 10., 10., 10., 10.],\n", + " [12., 12., 12., 12., 12., 12., 12., 12., 12., 12., 12.],\n", + " [14., 14., 14., 14., 14., 14., 14., 14., 14., 14., 14.],\n", + " [16., 16., 16., 16., 16., 16., 16., 16., 16., 16., 16.]])" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "matrix1[[5,6,7,8]]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## More Indexing Help\n", + "Indexing a 2d matrix can be a bit confusing at first, especially when you start to add in step size. Try google image searching NumPy indexing to fins useful images, like this one:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Selection\n", + "\n", + "Let's briefly go over how to use brackets for selection based off of comparison operators." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\n", + " 18, 19, 20, 21])" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr = np.arange(1,22)\n", + "arr" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([False, False, False, False, True, True, True, True, True,\n", + " True, True, True, True, True, True, True, True, True,\n", + " True, True, True])" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr>4" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "bool_arr= arr>5" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([False, False, False, False, False, True, True, True, True,\n", + " True, True, True, True, True, True, True, True, True,\n", + " True, True, True])" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bool_arr" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21])" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr[bool_arr]" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,\n", + " 21])" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr[arr>3]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21])" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x=6\n", + "arr[arr>x]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## NumPy Operations\n", + "\n", + "### Arithmetic \n", + "Arithmetic Operations. Input arrays for performing arithmetic operations such as add(), subtract(), multiply(), and divide() must be either of the same shape or should conform to array broadcasting rules.\n", + "\n", + "You Can easily perform array with array arithematic , or scalar with array arithematic \n", + "\n", + "lets see some examples \n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "arr= np.arange (0,25)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19, 20, 21, 22, 23, 24])" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32,\n", + " 34, 36, 38, 40, 42, 44, 46, 48])" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr+arr" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144,\n", + " 169, 196, 225, 256, 289, 324, 361, 400, 441, 484, 529, 576])" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr*arr" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0])" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr-arr" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\pskj0\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:3: RuntimeWarning: invalid value encountered in true_divide\n", + " This is separate from the ipykernel package so we can avoid doing imports until\n" + ] + }, + { + "data": { + "text/plain": [ + "array([nan, 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", + " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])" + ] + }, + "execution_count": 147, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Warning on division by zero, but not an error!\n", + "# Just replaced with nan\n", + "arr/arr" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\pskj0\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: RuntimeWarning: divide by zero encountered in true_divide\n", + " \n" + ] + }, + { + "data": { + "text/plain": [ + "array([ inf, 1. , 0.5 , 0.33333333, 0.25 ,\n", + " 0.2 , 0.16666667, 0.14285714, 0.125 , 0.11111111,\n", + " 0.1 , 0.09090909, 0.08333333, 0.07692308, 0.07142857,\n", + " 0.06666667, 0.0625 , 0.05882353, 0.05555556, 0.05263158,\n", + " 0.05 , 0.04761905, 0.04545455, 0.04347826, 0.04166667])" + ] + }, + "execution_count": 148, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Also warning, but not an error instead infinity\n", + "1/arr" + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 1, 8, 27, 64, 125, 216, 343, 512,\n", + " 729, 1000, 1331, 1728, 2197, 2744, 3375, 4096, 4913,\n", + " 5832, 6859, 8000, 9261, 10648, 12167, 13824], dtype=int32)" + ] + }, + "execution_count": 149, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr**3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Universal Array Functions\n", + "\n", + "Numpy comes with many [universal array functions](http://docs.scipy.org/doc/numpy/reference/ufuncs.html), which are essentially just mathematical operations you can use to perform the operation across the array. Let's show some common ones:" + ] + }, + { + "cell_type": "code", + "execution_count": 150, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0. , 1. , 1.41421356, 1.73205081, 2. ,\n", + " 2.23606798, 2.44948974, 2.64575131, 2.82842712, 3. ,\n", + " 3.16227766, 3.31662479, 3.46410162, 3.60555128, 3.74165739,\n", + " 3.87298335, 4. , 4.12310563, 4.24264069, 4.35889894,\n", + " 4.47213595, 4.58257569, 4.69041576, 4.79583152, 4.89897949])" + ] + }, + "execution_count": 150, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Taking square Roots \n", + "np.sqrt (arr)" + ] + }, + { + "cell_type": "code", + "execution_count": 151, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "24" + ] + }, + "execution_count": 151, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.max(arr) " + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,\n", + " 5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,\n", + " 2.98095799e+03, 8.10308393e+03, 2.20264658e+04, 5.98741417e+04,\n", + " 1.62754791e+05, 4.42413392e+05, 1.20260428e+06, 3.26901737e+06,\n", + " 8.88611052e+06, 2.41549528e+07, 6.56599691e+07, 1.78482301e+08,\n", + " 4.85165195e+08, 1.31881573e+09, 3.58491285e+09, 9.74480345e+09,\n", + " 2.64891221e+10])" + ] + }, + "execution_count": 152, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calculating exponential (e^)\n", + "np.exp(arr)" + ] + }, + { + "cell_type": "code", + "execution_count": 153, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "24" + ] + }, + "execution_count": 153, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.max(arr) #same as arr.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0. , 0.84147098, 0.90929743, 0.14112001, -0.7568025 ,\n", + " -0.95892427, -0.2794155 , 0.6569866 , 0.98935825, 0.41211849,\n", + " -0.54402111, -0.99999021, -0.53657292, 0.42016704, 0.99060736,\n", + " 0.65028784, -0.28790332, -0.96139749, -0.75098725, 0.14987721,\n", + " 0.91294525, 0.83665564, -0.00885131, -0.8462204 , -0.90557836])" + ] + }, + "execution_count": 154, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.sin(arr)" + ] + }, + { + "cell_type": "code", + "execution_count": 155, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\pskj0\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: RuntimeWarning: divide by zero encountered in log\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + }, + { + "data": { + "text/plain": [ + "array([ -inf, 0. , 0.69314718, 1.09861229, 1.38629436,\n", + " 1.60943791, 1.79175947, 1.94591015, 2.07944154, 2.19722458,\n", + " 2.30258509, 2.39789527, 2.48490665, 2.56494936, 2.63905733,\n", + " 2.7080502 , 2.77258872, 2.83321334, 2.89037176, 2.94443898,\n", + " 2.99573227, 3.04452244, 3.09104245, 3.13549422, 3.17805383])" + ] + }, + "execution_count": 155, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.log(arr)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Some Valueble insight of numpy lot more to explore \n", + "https://numpy.org/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/pandas/03.14 Pandas For Data Analysis.ipynb b/pandas/03.14 Pandas For Data Analysis.ipynb new file mode 100644 index 00000000..ca8dca15 --- /dev/null +++ b/pandas/03.14 Pandas For Data Analysis.ipynb @@ -0,0 +1,6589 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pandas Introduction \n", + "\n", + "Pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool, built on top of the Python programming language.\n", + "\n", + "You can say pandas is extremely powerful version of Excel\n", + "\n", + "In this section we are going to talk about \n", + "\n", + "* Introduction To pandas \n", + "* Seies \n", + "* DataFrames \n", + "* Missing Data\n", + "* Merging , Joining , And Concatenating \n", + "* Operations \n", + "* Data Input and Output " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Series " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fristly we are going to talk about Series DataType .\n", + "\n", + "A Series is very similar to numpy array , it is built on top of NumPy Array..\n", + "But Series can have axis labels , meaning it can be indexed by labels instead of just number location \n", + "\n", + "Series is a one-dimensional labeled array capable of holding data of any type (integer, string, float, python objects, etc.). The axis labels are collectively called index." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Lets import numpy and pandas \n", + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# We can convert a list , numpy array , or dict to Series\n", + "\n", + "labels = ['Shivendra','Ragavendra','Narendra']\n", + "my_list= [21,25,30]\n", + "arr=np.array([10,20,30])\n", + "d={'Shivendra':21,'Raghavendra':25,'Narendra':30}" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 21\n", + "1 25\n", + "2 30\n", + "dtype: int64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Using List \n", + "pd.Series(data=my_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Shivendra 21\n", + "Ragavendra 25\n", + "Narendra 30\n", + "dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.Series (data=my_list,index=labels )" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Shivendra 21\n", + "Ragavendra 25\n", + "Narendra 30\n", + "dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.Series(my_list,labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 10\n", + "1 20\n", + "2 30\n", + "dtype: int32" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# NumPy Array\n", + "pd.Series(arr)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Shivendra 10\n", + "Ragavendra 20\n", + "Narendra 30\n", + "dtype: int32" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.Series (data=arr,index=labels )" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Shivendra 21\n", + "Raghavendra 25\n", + "Narendra 30\n", + "dtype: int64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Dictonary\n", + "pd.Series (d)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data In A Series \n", + "\n", + "A Pandas Series can hold a variety of Objects " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 Shivendra\n", + "1 Ragavendra\n", + "2 Narendra\n", + "dtype: object" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.Series (data=labels )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using an Index\n", + "\n", + "The key to using a Series is understanding its index. Pandas makes use of these index names or numbers by allowing for fast look ups of information (works like a hash table or dictionary).\n", + "\n", + "Let's see some examples of how to grab information from a Series. Let us create two sereis, ser1 and ser2:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "ser1= pd.Series ([1,2,3,4], index =['Chennai','Bihar','West Bengal','Rajasthan'])" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Chennai 1\n", + "Bihar 2\n", + "West Bengal 3\n", + "Rajasthan 4\n", + "dtype: int64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ser1" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "ser2=pd.Series ([1,2,5,4],index=['Chennai','Bihar','Assam','Rajasthan'])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Chennai 1\n", + "Bihar 2\n", + "Assam 5\n", + "Rajasthan 4\n", + "dtype: int64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ser2" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ser1['Chennai']" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Assam NaN\n", + "Bihar 4.0\n", + "Chennai 2.0\n", + "Rajasthan 8.0\n", + "West Bengal NaN\n", + "dtype: float64" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Operations are then also done based off of index:\n", + "ser1+ser2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## DataFrames\n", + "Pandas DataFrame is two-dimensional size-mutable, potentially heterogeneous tabular data structure with labeled axes (rows and columns). A Data frame is a two-dimensional data structure, i.e., data is aligned in a tabular fashion in rows and columns. Pandas DataFrame consists of three principal components, the data, rows, and columns.\n", + "\n", + "DataFrames are the workhorse of pandas and are directly inspired by the R programming language. We can think of a DataFrame as a bunch of Series objects put together to share the same index. Let's use pandas to explore this topic!" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from numpy.random import randn\n", + "np.random.seed(101)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "df=pd.DataFrame (randn(5,5),index='Chennai Bihar UtterPredesh Delhi Mumbai'.split(),columns ='SRM NIT_PATNA BHU IIT_DELHI IIT_Bombay'.split())" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SRMNIT_PATNABHUIIT_DELHIIIT_Bombay
Chennai2.7068500.6281330.9079690.5038260.651118
Bihar-0.319318-0.8480770.605965-2.0181680.740122
UtterPredesh0.528813-0.5890010.188695-0.758872-0.933237
Delhi0.9550570.1907941.9787572.6059670.683509
Mumbai0.3026651.693723-1.706086-1.159119-0.134841
\n", + "
" + ], + "text/plain": [ + " SRM NIT_PATNA BHU IIT_DELHI IIT_Bombay\n", + "Chennai 2.706850 0.628133 0.907969 0.503826 0.651118\n", + "Bihar -0.319318 -0.848077 0.605965 -2.018168 0.740122\n", + "UtterPredesh 0.528813 -0.589001 0.188695 -0.758872 -0.933237\n", + "Delhi 0.955057 0.190794 1.978757 2.605967 0.683509\n", + "Mumbai 0.302665 1.693723 -1.706086 -1.159119 -0.134841" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Selection and Indexing\n", + "\n", + "Let's learn the various methods to grab data from a DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Chennai 2.706850\n", + "Bihar -0.319318\n", + "UtterPredesh 0.528813\n", + "Delhi 0.955057\n", + "Mumbai 0.302665\n", + "Name: SRM, dtype: float64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['SRM']" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SRMBHU
Chennai2.7068500.907969
Bihar-0.3193180.605965
UtterPredesh0.5288130.188695
Delhi0.9550571.978757
Mumbai0.302665-1.706086
\n", + "
" + ], + "text/plain": [ + " SRM BHU\n", + "Chennai 2.706850 0.907969\n", + "Bihar -0.319318 0.605965\n", + "UtterPredesh 0.528813 0.188695\n", + "Delhi 0.955057 1.978757\n", + "Mumbai 0.302665 -1.706086" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# We can pass a list of columns names \n", + "df[['SRM' , 'BHU']]" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Chennai 2.706850\n", + "Bihar -0.319318\n", + "UtterPredesh 0.528813\n", + "Delhi 0.955057\n", + "Mumbai 0.302665\n", + "Name: SRM, dtype: float64" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.SRM # SQL syntax" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "# Dataframe Columns are just Series" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.series.Series" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(df['SRM'])" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "# Creating a new columns \n", + "df['UPES']=df['SRM']" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "df['Harshita']=df['SRM'] + df['BHU']" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SRMNIT_PATNABHUIIT_DELHIIIT_BombayUPESHarshita
Chennai2.7068500.6281330.9079690.5038260.6511182.7068503.614819
Bihar-0.319318-0.8480770.605965-2.0181680.740122-0.3193180.286647
UtterPredesh0.528813-0.5890010.188695-0.758872-0.9332370.5288130.717509
Delhi0.9550570.1907941.9787572.6059670.6835090.9550572.933814
Mumbai0.3026651.693723-1.706086-1.159119-0.1348410.302665-1.403420
\n", + "
" + ], + "text/plain": [ + " SRM NIT_PATNA BHU IIT_DELHI IIT_Bombay UPES \\\n", + "Chennai 2.706850 0.628133 0.907969 0.503826 0.651118 2.706850 \n", + "Bihar -0.319318 -0.848077 0.605965 -2.018168 0.740122 -0.319318 \n", + "UtterPredesh 0.528813 -0.589001 0.188695 -0.758872 -0.933237 0.528813 \n", + "Delhi 0.955057 0.190794 1.978757 2.605967 0.683509 0.955057 \n", + "Mumbai 0.302665 1.693723 -1.706086 -1.159119 -0.134841 0.302665 \n", + "\n", + " Harshita \n", + "Chennai 3.614819 \n", + "Bihar 0.286647 \n", + "UtterPredesh 0.717509 \n", + "Delhi 2.933814 \n", + "Mumbai -1.403420 " + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SRMNIT_PATNABHUIIT_DELHIIIT_Bombay
Chennai2.7068500.6281330.9079690.5038260.651118
Bihar-0.319318-0.8480770.605965-2.0181680.740122
UtterPredesh0.528813-0.5890010.188695-0.758872-0.933237
Delhi0.9550570.1907941.9787572.6059670.683509
Mumbai0.3026651.693723-1.706086-1.159119-0.134841
\n", + "
" + ], + "text/plain": [ + " SRM NIT_PATNA BHU IIT_DELHI IIT_Bombay\n", + "Chennai 2.706850 0.628133 0.907969 0.503826 0.651118\n", + "Bihar -0.319318 -0.848077 0.605965 -2.018168 0.740122\n", + "UtterPredesh 0.528813 -0.589001 0.188695 -0.758872 -0.933237\n", + "Delhi 0.955057 0.190794 1.978757 2.605967 0.683509\n", + "Mumbai 0.302665 1.693723 -1.706086 -1.159119 -0.134841" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.drop('UPES',axis=1) # Axis = 1 for column" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SRMNIT_PATNABHUIIT_DELHIIIT_BombayUPES
Chennai2.7068500.6281330.9079690.5038260.6511182.706850
Bihar-0.319318-0.8480770.605965-2.0181680.740122-0.319318
UtterPredesh0.528813-0.5890010.188695-0.758872-0.9332370.528813
Delhi0.9550570.1907941.9787572.6059670.6835090.955057
Mumbai0.3026651.693723-1.706086-1.159119-0.1348410.302665
\n", + "
" + ], + "text/plain": [ + " SRM NIT_PATNA BHU IIT_DELHI IIT_Bombay UPES\n", + "Chennai 2.706850 0.628133 0.907969 0.503826 0.651118 2.706850\n", + "Bihar -0.319318 -0.848077 0.605965 -2.018168 0.740122 -0.319318\n", + "UtterPredesh 0.528813 -0.589001 0.188695 -0.758872 -0.933237 0.528813\n", + "Delhi 0.955057 0.190794 1.978757 2.605967 0.683509 0.955057\n", + "Mumbai 0.302665 1.693723 -1.706086 -1.159119 -0.134841 0.302665" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df # But again it will be appeared we need to use inplace to remove it parmanently" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "df.drop('UPES',axis=1,inplace =True )" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "df.drop('Harshita',axis=1,inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SRMNIT_PATNABHUIIT_DELHIIIT_Bombay
Chennai2.7068500.6281330.9079690.5038260.651118
Bihar-0.319318-0.8480770.605965-2.0181680.740122
UtterPredesh0.528813-0.5890010.188695-0.758872-0.933237
Delhi0.9550570.1907941.9787572.6059670.683509
Mumbai0.3026651.693723-1.706086-1.159119-0.134841
\n", + "
" + ], + "text/plain": [ + " SRM NIT_PATNA BHU IIT_DELHI IIT_Bombay\n", + "Chennai 2.706850 0.628133 0.907969 0.503826 0.651118\n", + "Bihar -0.319318 -0.848077 0.605965 -2.018168 0.740122\n", + "UtterPredesh 0.528813 -0.589001 0.188695 -0.758872 -0.933237\n", + "Delhi 0.955057 0.190794 1.978757 2.605967 0.683509\n", + "Mumbai 0.302665 1.693723 -1.706086 -1.159119 -0.134841" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SRMNIT_PATNABHUIIT_DELHIIIT_Bombay
Chennai2.7068500.6281330.9079690.5038260.651118
Bihar-0.319318-0.8480770.605965-2.0181680.740122
UtterPredesh0.528813-0.5890010.188695-0.758872-0.933237
Mumbai0.3026651.693723-1.706086-1.159119-0.134841
\n", + "
" + ], + "text/plain": [ + " SRM NIT_PATNA BHU IIT_DELHI IIT_Bombay\n", + "Chennai 2.706850 0.628133 0.907969 0.503826 0.651118\n", + "Bihar -0.319318 -0.848077 0.605965 -2.018168 0.740122\n", + "UtterPredesh 0.528813 -0.589001 0.188695 -0.758872 -0.933237\n", + "Mumbai 0.302665 1.693723 -1.706086 -1.159119 -0.134841" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.drop('Delhi',axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SRM 2.706850\n", + "NIT_PATNA 0.628133\n", + "BHU 0.907969\n", + "IIT_DELHI 0.503826\n", + "IIT_Bombay 0.651118\n", + "Name: Chennai, dtype: float64" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc['Chennai']" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SRM -0.319318\n", + "NIT_PATNA -0.848077\n", + "BHU 0.605965\n", + "IIT_DELHI -2.018168\n", + "IIT_Bombay 0.740122\n", + "Name: Bihar, dtype: float64" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# We can select based on indexing \n", + "df.iloc[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-0.8480769834036315" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc['Bihar','NIT_PATNA']" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NIT_PATNAIIT_DELHI
Bihar-0.848077-2.018168
Mumbai1.693723-1.159119
\n", + "
" + ], + "text/plain": [ + " NIT_PATNA IIT_DELHI\n", + "Bihar -0.848077 -2.018168\n", + "Mumbai 1.693723 -1.159119" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc[['Bihar','Mumbai'],['NIT_PATNA','IIT_DELHI']]" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SRMNIT_PATNABHUIIT_DELHIIIT_Bombay
Chennai2.7068500.6281330.9079690.5038260.651118
Bihar-0.319318-0.8480770.605965-2.0181680.740122
UtterPredesh0.528813-0.5890010.188695-0.758872-0.933237
Delhi0.9550570.1907941.9787572.6059670.683509
Mumbai0.3026651.693723-1.706086-1.159119-0.134841
\n", + "
" + ], + "text/plain": [ + " SRM NIT_PATNA BHU IIT_DELHI IIT_Bombay\n", + "Chennai 2.706850 0.628133 0.907969 0.503826 0.651118\n", + "Bihar -0.319318 -0.848077 0.605965 -2.018168 0.740122\n", + "UtterPredesh 0.528813 -0.589001 0.188695 -0.758872 -0.933237\n", + "Delhi 0.955057 0.190794 1.978757 2.605967 0.683509\n", + "Mumbai 0.302665 1.693723 -1.706086 -1.159119 -0.134841" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SRMNIT_PATNABHUIIT_DELHIIIT_Bombay
ChennaiTrueTrueTrueTrueTrue
BiharFalseFalseTrueFalseTrue
UtterPredeshTrueFalseTrueFalseFalse
DelhiTrueTrueTrueTrueTrue
MumbaiTrueTrueFalseFalseFalse
\n", + "
" + ], + "text/plain": [ + " SRM NIT_PATNA BHU IIT_DELHI IIT_Bombay\n", + "Chennai True True True True True\n", + "Bihar False False True False True\n", + "UtterPredesh True False True False False\n", + "Delhi True True True True True\n", + "Mumbai True True False False False" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df>0" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SRMNIT_PATNABHUIIT_DELHIIIT_Bombay
Chennai2.7068500.6281330.9079690.5038260.651118
BiharNaNNaN0.605965NaN0.740122
UtterPredesh0.528813NaN0.188695NaNNaN
Delhi0.9550570.1907941.9787572.6059670.683509
Mumbai0.3026651.693723NaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " SRM NIT_PATNA BHU IIT_DELHI IIT_Bombay\n", + "Chennai 2.706850 0.628133 0.907969 0.503826 0.651118\n", + "Bihar NaN NaN 0.605965 NaN 0.740122\n", + "UtterPredesh 0.528813 NaN 0.188695 NaN NaN\n", + "Delhi 0.955057 0.190794 1.978757 2.605967 0.683509\n", + "Mumbai 0.302665 1.693723 NaN NaN NaN" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df>0]" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SRMNIT_PATNABHUIIT_DELHIIIT_Bombay
Chennai2.7068500.6281330.9079690.5038260.651118
UtterPredesh0.528813-0.5890010.188695-0.758872-0.933237
Delhi0.9550570.1907941.9787572.6059670.683509
Mumbai0.3026651.693723-1.706086-1.159119-0.134841
\n", + "
" + ], + "text/plain": [ + " SRM NIT_PATNA BHU IIT_DELHI IIT_Bombay\n", + "Chennai 2.706850 0.628133 0.907969 0.503826 0.651118\n", + "UtterPredesh 0.528813 -0.589001 0.188695 -0.758872 -0.933237\n", + "Delhi 0.955057 0.190794 1.978757 2.605967 0.683509\n", + "Mumbai 0.302665 1.693723 -1.706086 -1.159119 -0.134841" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df [df['SRM']>0] # It will not print Bihar Cz Bihar is having negetive number" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Chennai 0.907969\n", + "UtterPredesh 0.188695\n", + "Delhi 1.978757\n", + "Mumbai -1.706086\n", + "Name: BHU, dtype: float64" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df['SRM']>0]['BHU'] # It will not print Bihar data since it is having negetive number " + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Chennai 2.706850\n", + "Bihar -0.319318\n", + "UtterPredesh 0.528813\n", + "Delhi 0.955057\n", + "Name: SRM, dtype: float64" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df['BHU']>0]['SRM'] # It will not print mumbai's data" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
BHUIIT_DELHI
Chennai0.9079690.503826
UtterPredesh0.188695-0.758872
Delhi1.9787572.605967
Mumbai-1.706086-1.159119
\n", + "
" + ], + "text/plain": [ + " BHU IIT_DELHI\n", + "Chennai 0.907969 0.503826\n", + "UtterPredesh 0.188695 -0.758872\n", + "Delhi 1.978757 2.605967\n", + "Mumbai -1.706086 -1.159119" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df['SRM']>0][['BHU','IIT_DELHI']]" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SRMNIT_PATNABHUIIT_DELHIIIT_Bombay
Chennai2.7068500.6281330.9079690.5038260.651118
Delhi0.9550570.1907941.9787572.6059670.683509
\n", + "
" + ], + "text/plain": [ + " SRM NIT_PATNA BHU IIT_DELHI IIT_Bombay\n", + "Chennai 2.706850 0.628133 0.907969 0.503826 0.651118\n", + "Delhi 0.955057 0.190794 1.978757 2.605967 0.683509" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[(df['SRM']>0.955)& df['BHU']>0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## More Index Details\n", + "\n", + "Let's discuss some more features of indexing, including resetting the index or setting it something else. We'll also talk about index hierarchy!" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SRMNIT_PATNABHUIIT_DELHIIIT_Bombay
Chennai2.7068500.6281330.9079690.5038260.651118
Bihar-0.319318-0.8480770.605965-2.0181680.740122
UtterPredesh0.528813-0.5890010.188695-0.758872-0.933237
Delhi0.9550570.1907941.9787572.6059670.683509
Mumbai0.3026651.693723-1.706086-1.159119-0.134841
\n", + "
" + ], + "text/plain": [ + " SRM NIT_PATNA BHU IIT_DELHI IIT_Bombay\n", + "Chennai 2.706850 0.628133 0.907969 0.503826 0.651118\n", + "Bihar -0.319318 -0.848077 0.605965 -2.018168 0.740122\n", + "UtterPredesh 0.528813 -0.589001 0.188695 -0.758872 -0.933237\n", + "Delhi 0.955057 0.190794 1.978757 2.605967 0.683509\n", + "Mumbai 0.302665 1.693723 -1.706086 -1.159119 -0.134841" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexSRMNIT_PATNABHUIIT_DELHIIIT_Bombay
0Chennai2.7068500.6281330.9079690.5038260.651118
1Bihar-0.319318-0.8480770.605965-2.0181680.740122
2UtterPredesh0.528813-0.5890010.188695-0.758872-0.933237
3Delhi0.9550570.1907941.9787572.6059670.683509
4Mumbai0.3026651.693723-1.706086-1.159119-0.134841
\n", + "
" + ], + "text/plain": [ + " index SRM NIT_PATNA BHU IIT_DELHI IIT_Bombay\n", + "0 Chennai 2.706850 0.628133 0.907969 0.503826 0.651118\n", + "1 Bihar -0.319318 -0.848077 0.605965 -2.018168 0.740122\n", + "2 UtterPredesh 0.528813 -0.589001 0.188695 -0.758872 -0.933237\n", + "3 Delhi 0.955057 0.190794 1.978757 2.605967 0.683509\n", + "4 Mumbai 0.302665 1.693723 -1.706086 -1.159119 -0.134841" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "newind ='Tamil_Nadu BIHAR UP Delhi Maharastra'.split()" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "df['States']=newind" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SRMNIT_PATNABHUIIT_DELHIIIT_BombayStates
Chennai2.7068500.6281330.9079690.5038260.651118Tamil_Nadu
Bihar-0.319318-0.8480770.605965-2.0181680.740122BIHAR
UtterPredesh0.528813-0.5890010.188695-0.758872-0.933237UP
Delhi0.9550570.1907941.9787572.6059670.683509Delhi
Mumbai0.3026651.693723-1.706086-1.159119-0.134841Maharastra
\n", + "
" + ], + "text/plain": [ + " SRM NIT_PATNA BHU IIT_DELHI IIT_Bombay States\n", + "Chennai 2.706850 0.628133 0.907969 0.503826 0.651118 Tamil_Nadu\n", + "Bihar -0.319318 -0.848077 0.605965 -2.018168 0.740122 BIHAR\n", + "UtterPredesh 0.528813 -0.589001 0.188695 -0.758872 -0.933237 UP\n", + "Delhi 0.955057 0.190794 1.978757 2.605967 0.683509 Delhi\n", + "Mumbai 0.302665 1.693723 -1.706086 -1.159119 -0.134841 Maharastra" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SRMNIT_PATNABHUIIT_DELHIIIT_Bombay
States
Tamil_Nadu2.7068500.6281330.9079690.5038260.651118
BIHAR-0.319318-0.8480770.605965-2.0181680.740122
UP0.528813-0.5890010.188695-0.758872-0.933237
Delhi0.9550570.1907941.9787572.6059670.683509
Maharastra0.3026651.693723-1.706086-1.159119-0.134841
\n", + "
" + ], + "text/plain": [ + " SRM NIT_PATNA BHU IIT_DELHI IIT_Bombay\n", + "States \n", + "Tamil_Nadu 2.706850 0.628133 0.907969 0.503826 0.651118\n", + "BIHAR -0.319318 -0.848077 0.605965 -2.018168 0.740122\n", + "UP 0.528813 -0.589001 0.188695 -0.758872 -0.933237\n", + "Delhi 0.955057 0.190794 1.978757 2.605967 0.683509\n", + "Maharastra 0.302665 1.693723 -1.706086 -1.159119 -0.134841" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.set_index('States')" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SRMNIT_PATNABHUIIT_DELHIIIT_BombayStates
Chennai2.7068500.6281330.9079690.5038260.651118Tamil_Nadu
Bihar-0.319318-0.8480770.605965-2.0181680.740122BIHAR
UtterPredesh0.528813-0.5890010.188695-0.758872-0.933237UP
Delhi0.9550570.1907941.9787572.6059670.683509Delhi
Mumbai0.3026651.693723-1.706086-1.159119-0.134841Maharastra
\n", + "
" + ], + "text/plain": [ + " SRM NIT_PATNA BHU IIT_DELHI IIT_Bombay States\n", + "Chennai 2.706850 0.628133 0.907969 0.503826 0.651118 Tamil_Nadu\n", + "Bihar -0.319318 -0.848077 0.605965 -2.018168 0.740122 BIHAR\n", + "UtterPredesh 0.528813 -0.589001 0.188695 -0.758872 -0.933237 UP\n", + "Delhi 0.955057 0.190794 1.978757 2.605967 0.683509 Delhi\n", + "Mumbai 0.302665 1.693723 -1.706086 -1.159119 -0.134841 Maharastra" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "df.set_index('States',inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SRMNIT_PATNABHUIIT_DELHIIIT_Bombay
States
Tamil_Nadu2.7068500.6281330.9079690.5038260.651118
BIHAR-0.319318-0.8480770.605965-2.0181680.740122
UP0.528813-0.5890010.188695-0.758872-0.933237
Delhi0.9550570.1907941.9787572.6059670.683509
Maharastra0.3026651.693723-1.706086-1.159119-0.134841
\n", + "
" + ], + "text/plain": [ + " SRM NIT_PATNA BHU IIT_DELHI IIT_Bombay\n", + "States \n", + "Tamil_Nadu 2.706850 0.628133 0.907969 0.503826 0.651118\n", + "BIHAR -0.319318 -0.848077 0.605965 -2.018168 0.740122\n", + "UP 0.528813 -0.589001 0.188695 -0.758872 -0.933237\n", + "Delhi 0.955057 0.190794 1.978757 2.605967 0.683509\n", + "Maharastra 0.302665 1.693723 -1.706086 -1.159119 -0.134841" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Multi-Index and Index Hierarchy\n", + "\n", + "Let us go over how to work with Multi-Index, first we'll create a quick example of what a Multi-Indexed DataFrame would look like:" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "# index Levels \n", + "outside =['Big_data','Big_data','Big_data','AI','AI','AI']\n", + "inside =[1,2,3,1,2,3]\n", + "hier_index=list(zip(outside,inside))\n", + "hier_index=pd.MultiIndex.from_tuples(hier_index)" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MultiIndex([('Big_data', 1),\n", + " ('Big_data', 2),\n", + " ('Big_data', 3),\n", + " ( 'AI', 1),\n", + " ( 'AI', 2),\n", + " ( 'AI', 3)],\n", + " )" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hier_index" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "df=pd.DataFrame(np.random.rand (6,2),index=hier_index,columns=['Core','volunteers'])" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Corevolunteers
Big_data10.7013710.487635
20.6806780.521548
30.0433970.223937
AI10.5752050.120434
20.5001170.138010
30.0528080.178277
\n", + "
" + ], + "text/plain": [ + " Core volunteers\n", + "Big_data 1 0.701371 0.487635\n", + " 2 0.680678 0.521548\n", + " 3 0.043397 0.223937\n", + "AI 1 0.575205 0.120434\n", + " 2 0.500117 0.138010\n", + " 3 0.052808 0.178277" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's show how to index this! For index hierarchy we use df.loc[], if this was on the columns axis, you would just use normal bracket notation df[]. Calling one level of the index returns the sub-dataframe:" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Corevolunteers
10.7013710.487635
20.6806780.521548
30.0433970.223937
\n", + "
" + ], + "text/plain": [ + " Core volunteers\n", + "1 0.701371 0.487635\n", + "2 0.680678 0.521548\n", + "3 0.043397 0.223937" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc['Big_data']" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Core 0.701371\n", + "volunteers 0.487635\n", + "Name: 1, dtype: float64" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc['Big_data'].loc[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "FrozenList([None, None])" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.index.names" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [], + "source": [ + "df.index.names=['Domain','S.NO']" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Corevolunteers
DomainS.NO
Big_data10.7013710.487635
20.6806780.521548
30.0433970.223937
AI10.5752050.120434
20.5001170.138010
30.0528080.178277
\n", + "
" + ], + "text/plain": [ + " Core volunteers\n", + "Domain S.NO \n", + "Big_data 1 0.701371 0.487635\n", + " 2 0.680678 0.521548\n", + " 3 0.043397 0.223937\n", + "AI 1 0.575205 0.120434\n", + " 2 0.500117 0.138010\n", + " 3 0.052808 0.178277" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Corevolunteers
S.NO
10.7013710.487635
20.6806780.521548
30.0433970.223937
\n", + "
" + ], + "text/plain": [ + " Core volunteers\n", + "S.NO \n", + "1 0.701371 0.487635\n", + "2 0.680678 0.521548\n", + "3 0.043397 0.223937" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.xs('Big_data')" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [], + "source": [ + "weather_data = {\n", + " 'day': ['1/1/2017','1/2/2017','1/3/2017','1/4/2017','1/5/2017','1/6/2017'],\n", + " 'temperature': [32,35,28,24,32,31],\n", + " 'windspeed': [6,7,2,7,4,2],\n", + " 'event': ['Rain', 'Sunny', 'Snow','Snow','Rain', 'Sunny']\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [], + "source": [ + "df=pd.DataFrame(weather_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
daytemperaturewindspeedevent
01/1/2017326Rain
11/2/2017357Sunny
21/3/2017282Snow
31/4/2017247Snow
41/5/2017324Rain
51/6/2017312Sunny
\n", + "
" + ], + "text/plain": [ + " day temperature windspeed event\n", + "0 1/1/2017 32 6 Rain\n", + "1 1/2/2017 35 7 Sunny\n", + "2 1/3/2017 28 2 Snow\n", + "3 1/4/2017 24 7 Snow\n", + "4 1/5/2017 32 4 Rain\n", + "5 1/6/2017 31 2 Sunny" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(6, 4)" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape # rows, columns = df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
daytemperaturewindspeedevent
01/1/2017326Rain
11/2/2017357Sunny
21/3/2017282Snow
31/4/2017247Snow
41/5/2017324Rain
\n", + "
" + ], + "text/plain": [ + " day temperature windspeed event\n", + "0 1/1/2017 32 6 Rain\n", + "1 1/2/2017 35 7 Sunny\n", + "2 1/3/2017 28 2 Snow\n", + "3 1/4/2017 24 7 Snow\n", + "4 1/5/2017 32 4 Rain" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head() # df.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
daytemperaturewindspeedevent
11/2/2017357Sunny
21/3/2017282Snow
31/4/2017247Snow
41/5/2017324Rain
51/6/2017312Sunny
\n", + "
" + ], + "text/plain": [ + " day temperature windspeed event\n", + "1 1/2/2017 35 7 Sunny\n", + "2 1/3/2017 28 2 Snow\n", + "3 1/4/2017 24 7 Snow\n", + "4 1/5/2017 32 4 Rain\n", + "5 1/6/2017 31 2 Sunny" + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.tail() # df.tail(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
daytemperaturewindspeedevent
11/2/2017357Sunny
21/3/2017282Snow
\n", + "
" + ], + "text/plain": [ + " day temperature windspeed event\n", + "1 1/2/2017 35 7 Sunny\n", + "2 1/3/2017 28 2 Snow" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[1:3]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Columns" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1/1/2017\n", + "1 1/2/2017\n", + "2 1/3/2017\n", + "3 1/4/2017\n", + "4 1/5/2017\n", + "5 1/6/2017\n", + "Name: day, dtype: object" + ] + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['day']" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.series.Series" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(df['day'])" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
daytemperature
01/1/201732
11/2/201735
21/3/201728
31/4/201724
41/5/201732
51/6/201731
\n", + "
" + ], + "text/plain": [ + " day temperature\n", + "0 1/1/2017 32\n", + "1 1/2/2017 35\n", + "2 1/3/2017 28\n", + "3 1/4/2017 24\n", + "4 1/5/2017 32\n", + "5 1/6/2017 31" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['day','temperature']]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Operations On DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "35" + ] + }, + "execution_count": 97, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['temperature'].max()" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
daytemperaturewindspeedevent
11/2/2017357Sunny
\n", + "
" + ], + "text/plain": [ + " day temperature windspeed event\n", + "1 1/2/2017 35 7 Sunny" + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df['temperature']>32]" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1 1/2/2017\n", + "Name: day, dtype: object" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['day'][df['temperature'] == df['temperature'].max()] # Kinda doing SQL in pandas" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3.8297084310253524" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['temperature'].std()" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Sunny'" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['event'].max() # But mean() won't work since data type is string" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
temperaturewindspeed
count6.0000006.000000
mean30.3333334.666667
std3.8297082.338090
min24.0000002.000000
25%28.7500002.500000
50%31.5000005.000000
75%32.0000006.750000
max35.0000007.000000
\n", + "
" + ], + "text/plain": [ + " temperature windspeed\n", + "count 6.000000 6.000000\n", + "mean 30.333333 4.666667\n", + "std 3.829708 2.338090\n", + "min 24.000000 2.000000\n", + "25% 28.750000 2.500000\n", + "50% 31.500000 5.000000\n", + "75% 32.000000 6.750000\n", + "max 35.000000 7.000000" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Missing Data\n", + "\n", + "Let's show a few convenient methods to deal with Missing Data in pandas:" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame({'A':[1,2,np.nan],\n", + " 'B':[5,np.nan,np.nan],\n", + " 'C':[1,2,3]})" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABC
01.05.01
12.0NaN2
2NaNNaN3
\n", + "
" + ], + "text/plain": [ + " A B C\n", + "0 1.0 5.0 1\n", + "1 2.0 NaN 2\n", + "2 NaN NaN 3" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABC
01.05.01
\n", + "
" + ], + "text/plain": [ + " A B C\n", + "0 1.0 5.0 1" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
C
01
12
23
\n", + "
" + ], + "text/plain": [ + " C\n", + "0 1\n", + "1 2\n", + "2 3" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dropna(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABC
01.05.01
12.0NaN2
\n", + "
" + ], + "text/plain": [ + " A B C\n", + "0 1.0 5.0 1\n", + "1 2.0 NaN 2" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dropna(thresh=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABC
0151
12shivendra2
2shivendrashivendra3
\n", + "
" + ], + "text/plain": [ + " A B C\n", + "0 1 5 1\n", + "1 2 shivendra 2\n", + "2 shivendra shivendra 3" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.fillna(value='shivendra')" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1.0\n", + "1 2.0\n", + "2 1.5\n", + "Name: A, dtype: float64" + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['A'].fillna(value=df['A'].mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Groupby\n", + "\n", + "The groupby method allows you to group rows of data together and call aggregate functions" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "# Create dataframe\n", + "data = {'Company':['GOOG','GOOG','MSFT','MSFT','FB','FB'],\n", + " 'Person':['Shivendra','Abhishek','Sowjanya','Manish','Mini','Satya'],\n", + " 'Sales':[200,120,340,124,243,350]}" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyPersonSales
0GOOGShivendra200
1GOOGAbhishek120
2MSFTSowjanya340
3MSFTManish124
4FBMini243
5FBSatya350
\n", + "
" + ], + "text/plain": [ + " Company Person Sales\n", + "0 GOOG Shivendra 200\n", + "1 GOOG Abhishek 120\n", + "2 MSFT Sowjanya 340\n", + "3 MSFT Manish 124\n", + "4 FB Mini 243\n", + "5 FB Satya 350" + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "** Now you can use the .groupby() method to group rows together based off of a column name. For instance let's group based off of Company. This will create a DataFrameGroupBy object:**" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('Company')" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Sales
Company
FB296.5
GOOG160.0
MSFT232.0
\n", + "
" + ], + "text/plain": [ + " Sales\n", + "Company \n", + "FB 296.5\n", + "GOOG 160.0\n", + "MSFT 232.0" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#You can save this object as a new variable:\n", + "by_comp = df.groupby(\"Company\")\n", + "#And then call aggregate methods off the object:\n", + "by_comp.mean()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Sales
Company
FB296.5
GOOG160.0
MSFT232.0
\n", + "
" + ], + "text/plain": [ + " Sales\n", + "Company \n", + "FB 296.5\n", + "GOOG 160.0\n", + "MSFT 232.0" + ] + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('Company').mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Sales
Company
FB75.660426
GOOG56.568542
MSFT152.735065
\n", + "
" + ], + "text/plain": [ + " Sales\n", + "Company \n", + "FB 75.660426\n", + "GOOG 56.568542\n", + "MSFT 152.735065" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#More examples of aggregate methods:\n", + "by_comp.std()" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PersonSales
Company
FBSatya350
GOOGShivendra200
MSFTSowjanya340
\n", + "
" + ], + "text/plain": [ + " Person Sales\n", + "Company \n", + "FB Satya 350\n", + "GOOG Shivendra 200\n", + "MSFT Sowjanya 340" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "by_comp.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PersonSales
Company
FBMini243
GOOGAbhishek120
MSFTManish124
\n", + "
" + ], + "text/plain": [ + " Person Sales\n", + "Company \n", + "FB Mini 243\n", + "GOOG Abhishek 120\n", + "MSFT Manish 124" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "by_comp.min()" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Sales
countmeanstdmin25%50%75%max
Company
FB2.0296.575.660426243.0269.75296.5323.25350.0
GOOG2.0160.056.568542120.0140.00160.0180.00200.0
MSFT2.0232.0152.735065124.0178.00232.0286.00340.0
\n", + "
" + ], + "text/plain": [ + " Sales \n", + " count mean std min 25% 50% 75% max\n", + "Company \n", + "FB 2.0 296.5 75.660426 243.0 269.75 296.5 323.25 350.0\n", + "GOOG 2.0 160.0 56.568542 120.0 140.00 160.0 180.00 200.0\n", + "MSFT 2.0 232.0 152.735065 124.0 178.00 232.0 286.00 340.0" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "by_comp.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CompanyFBGOOGMSFT
Salescount2.0000002.0000002.000000
mean296.500000160.000000232.000000
std75.66042656.568542152.735065
min243.000000120.000000124.000000
25%269.750000140.000000178.000000
50%296.500000160.000000232.000000
75%323.250000180.000000286.000000
max350.000000200.000000340.000000
\n", + "
" + ], + "text/plain": [ + "Company FB GOOG MSFT\n", + "Sales count 2.000000 2.000000 2.000000\n", + " mean 296.500000 160.000000 232.000000\n", + " std 75.660426 56.568542 152.735065\n", + " min 243.000000 120.000000 124.000000\n", + " 25% 269.750000 140.000000 178.000000\n", + " 50% 296.500000 160.000000 232.000000\n", + " 75% 323.250000 180.000000 286.000000\n", + " max 350.000000 200.000000 340.000000" + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "by_comp.describe().transpose()" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Sales count 2.000000\n", + " mean 160.000000\n", + " std 56.568542\n", + " min 120.000000\n", + " 25% 140.000000\n", + " 50% 160.000000\n", + " 75% 180.000000\n", + " max 200.000000\n", + "Name: GOOG, dtype: float64" + ] + }, + "execution_count": 97, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "by_comp.describe().transpose()['GOOG']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Merging, Joining, and Concatenating\n", + "\n", + "There are 3 main ways of combining DataFrames together: Merging, Joining and Concatenating. In this we will discuss these 3 methods with examples.\n", + "\n", + "____" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [], + "source": [ + "df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],\n", + " 'B': ['B0', 'B1', 'B2', 'B3'],\n", + " 'C': ['C0', 'C1', 'C2', 'C3'],\n", + " 'D': ['D0', 'D1', 'D2', 'D3']},\n", + " index=[0, 1, 2, 3])" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [], + "source": [ + "df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],\n", + " 'B': ['B4', 'B5', 'B6', 'B7'],\n", + " 'C': ['C4', 'C5', 'C6', 'C7'],\n", + " 'D': ['D4', 'D5', 'D6', 'D7']},\n", + " index=[4, 5, 6, 7]) " + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [], + "source": [ + "df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],\n", + " 'B': ['B8', 'B9', 'B10', 'B11'],\n", + " 'C': ['C8', 'C9', 'C10', 'C11'],\n", + " 'D': ['D8', 'D9', 'D10', 'D11']},\n", + " index=[8, 9, 10, 11])" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
0A0B0C0D0
1A1B1C1D1
2A2B2C2D2
3A3B3C3D3
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "0 A0 B0 C0 D0\n", + "1 A1 B1 C1 D1\n", + "2 A2 B2 C2 D2\n", + "3 A3 B3 C3 D3" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
4A4B4C4D4
5A5B5C5D5
6A6B6C6D6
7A7B7C7D7
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "4 A4 B4 C4 D4\n", + "5 A5 B5 C5 D5\n", + "6 A6 B6 C6 D6\n", + "7 A7 B7 C7 D7" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
8A8B8C8D8
9A9B9C9D9
10A10B10C10D10
11A11B11C11D11
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "8 A8 B8 C8 D8\n", + "9 A9 B9 C9 D9\n", + "10 A10 B10 C10 D10\n", + "11 A11 B11 C11 D11" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Concatenation\n", + "\n", + "Concatenation basically glues together DataFrames. Keep in mind that dimensions should match along the axis you are concatenating on. You can use **pd.concat** and pass in a list of DataFrames to concatenate together:" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
0A0B0C0D0
1A1B1C1D1
2A2B2C2D2
3A3B3C3D3
4A4B4C4D4
5A5B5C5D5
6A6B6C6D6
7A7B7C7D7
8A8B8C8D8
9A9B9C9D9
10A10B10C10D10
11A11B11C11D11
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "0 A0 B0 C0 D0\n", + "1 A1 B1 C1 D1\n", + "2 A2 B2 C2 D2\n", + "3 A3 B3 C3 D3\n", + "4 A4 B4 C4 D4\n", + "5 A5 B5 C5 D5\n", + "6 A6 B6 C6 D6\n", + "7 A7 B7 C7 D7\n", + "8 A8 B8 C8 D8\n", + "9 A9 B9 C9 D9\n", + "10 A10 B10 C10 D10\n", + "11 A11 B11 C11 D11" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df1,df2,df3])" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCDABCDABCD
0A0B0C0D0NaNNaNNaNNaNNaNNaNNaNNaN
1A1B1C1D1NaNNaNNaNNaNNaNNaNNaNNaN
2A2B2C2D2NaNNaNNaNNaNNaNNaNNaNNaN
3A3B3C3D3NaNNaNNaNNaNNaNNaNNaNNaN
4NaNNaNNaNNaNA4B4C4D4NaNNaNNaNNaN
5NaNNaNNaNNaNA5B5C5D5NaNNaNNaNNaN
6NaNNaNNaNNaNA6B6C6D6NaNNaNNaNNaN
7NaNNaNNaNNaNA7B7C7D7NaNNaNNaNNaN
8NaNNaNNaNNaNNaNNaNNaNNaNA8B8C8D8
9NaNNaNNaNNaNNaNNaNNaNNaNA9B9C9D9
10NaNNaNNaNNaNNaNNaNNaNNaNA10B10C10D10
11NaNNaNNaNNaNNaNNaNNaNNaNA11B11C11D11
\n", + "
" + ], + "text/plain": [ + " A B C D A B C D A B C D\n", + "0 A0 B0 C0 D0 NaN NaN NaN NaN NaN NaN NaN NaN\n", + "1 A1 B1 C1 D1 NaN NaN NaN NaN NaN NaN NaN NaN\n", + "2 A2 B2 C2 D2 NaN NaN NaN NaN NaN NaN NaN NaN\n", + "3 A3 B3 C3 D3 NaN NaN NaN NaN NaN NaN NaN NaN\n", + "4 NaN NaN NaN NaN A4 B4 C4 D4 NaN NaN NaN NaN\n", + "5 NaN NaN NaN NaN A5 B5 C5 D5 NaN NaN NaN NaN\n", + "6 NaN NaN NaN NaN A6 B6 C6 D6 NaN NaN NaN NaN\n", + "7 NaN NaN NaN NaN A7 B7 C7 D7 NaN NaN NaN NaN\n", + "8 NaN NaN NaN NaN NaN NaN NaN NaN A8 B8 C8 D8\n", + "9 NaN NaN NaN NaN NaN NaN NaN NaN A9 B9 C9 D9\n", + "10 NaN NaN NaN NaN NaN NaN NaN NaN A10 B10 C10 D10\n", + "11 NaN NaN NaN NaN NaN NaN NaN NaN A11 B11 C11 D11" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df1,df2,df3],axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Operations\n", + "\n", + "There are lots of operations with pandas that will be really useful to you, but don't fall into any distinct category. Let's show them here in this lecture:" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
col1col2col3
01444abc
12555def
23666ghi
34444xyz
\n", + "
" + ], + "text/plain": [ + " col1 col2 col3\n", + "0 1 444 abc\n", + "1 2 555 def\n", + "2 3 666 ghi\n", + "3 4 444 xyz" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame({'col1':[1,2,3,4],'col2':[444,555,666,444],'col3':['abc','def','ghi','xyz']})\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([444, 555, 666], dtype=int64)" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['col2'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['col2'].nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "444 2\n", + "555 1\n", + "666 1\n", + "Name: col2, dtype: int64" + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['col2'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [], + "source": [ + "#Select from DataFrame using criteria from multiple columns\n", + "newdf = df[(df['col1']>2) & (df['col2']==444)]" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
col1col2col3
34444xyz
\n", + "
" + ], + "text/plain": [ + " col1 col2 col3\n", + "3 4 444 xyz" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "newdf" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [], + "source": [ + "# Applying Functions\n", + "def times2(x):\n", + " return x*2" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2\n", + "1 4\n", + "2 6\n", + "3 8\n", + "Name: col1, dtype: int64" + ] + }, + "execution_count": 113, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['col1'].apply(times2)" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 3\n", + "1 3\n", + "2 3\n", + "3 3\n", + "Name: col3, dtype: int64" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['col3'].apply(len)" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10" + ] + }, + "execution_count": 115, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['col1'].sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "** Permanently Removing a Column**" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [], + "source": [ + "del df['col1']" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
col2col3
0444abc
1555def
2666ghi
3444xyz
\n", + "
" + ], + "text/plain": [ + " col2 col3\n", + "0 444 abc\n", + "1 555 def\n", + "2 666 ghi\n", + "3 444 xyz" + ] + }, + "execution_count": 117, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['col2', 'col3'], dtype='object')" + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# get columns and index names \n", + "df.columns " + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RangeIndex(start=0, stop=4, step=1)" + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.index" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
col2col3
0444abc
1555def
2666ghi
3444xyz
\n", + "
" + ], + "text/plain": [ + " col2 col3\n", + "0 444 abc\n", + "1 555 def\n", + "2 666 ghi\n", + "3 444 xyz" + ] + }, + "execution_count": 120, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
col2col3
0444abc
3444xyz
1555def
2666ghi
\n", + "
" + ], + "text/plain": [ + " col2 col3\n", + "0 444 abc\n", + "3 444 xyz\n", + "1 555 def\n", + "2 666 ghi" + ] + }, + "execution_count": 121, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.sort_values(by='col2') #inplace=False by default" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
col2col3
0FalseFalse
1FalseFalse
2FalseFalse
3FalseFalse
\n", + "
" + ], + "text/plain": [ + " col2 col3\n", + "0 False False\n", + "1 False False\n", + "2 False False\n", + "3 False False" + ] + }, + "execution_count": 123, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check is there any null value or not \n", + "df.isnull()" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
col2col3
0444abc
1555def
2666ghi
3444xyz
\n", + "
" + ], + "text/plain": [ + " col2 col3\n", + "0 444 abc\n", + "1 555 def\n", + "2 666 ghi\n", + "3 444 xyz" + ] + }, + "execution_count": 124, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Drop rows with NaN Values\n", + "df.dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
col1col2col3
01.0NaNabc
12.0555.0def
23.0666.0ghi
3NaN444.0xyz
\n", + "
" + ], + "text/plain": [ + " col1 col2 col3\n", + "0 1.0 NaN abc\n", + "1 2.0 555.0 def\n", + "2 3.0 666.0 ghi\n", + "3 NaN 444.0 xyz" + ] + }, + "execution_count": 125, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame({'col1':[1,2,3,np.nan],\n", + " 'col2':[np.nan,555,666,444],\n", + " 'col3':['abc','def','ghi','xyz']})\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
col1col2col3
01FILLabc
12555def
23666ghi
3FILL444xyz
\n", + "
" + ], + "text/plain": [ + " col1 col2 col3\n", + "0 1 FILL abc\n", + "1 2 555 def\n", + "2 3 666 ghi\n", + "3 FILL 444 xyz" + ] + }, + "execution_count": 126, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.fillna('FILL')" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": {}, + "outputs": [], + "source": [ + "data = {'A':['foo','foo','foo','bar','bar','bar'],\n", + " 'B':['one','one','two','two','one','one'],\n", + " 'C':['x','y','x','y','x','y'],\n", + " 'D':[1,3,2,5,4,1]}\n", + "\n", + "df = pd.DataFrame(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
0fooonex1
1foooney3
2footwox2
3bartwoy5
4baronex4
5baroney1
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "0 foo one x 1\n", + "1 foo one y 3\n", + "2 foo two x 2\n", + "3 bar two y 5\n", + "4 bar one x 4\n", + "5 bar one y 1" + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Great" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}