black format

weijie-chen · Jun 9, 2024 · e87aa7b · e87aa7b
1 parent be89d29
commit e87aa7b
Show file tree

Hide file tree

Showing 7 changed files with 1,314 additions and 801 deletions.
diff --git a/Chapter 1 - Descriptive Statistics.ipynb b/Chapter 1 - Descriptive Statistics.ipynb
@@ -10,7 +10,8 @@
  "import matplotlib.pyplot as plt\n",
  "import scipy as sp\n",
  "from scipy import stats\n",
- "plt.style.use('fivethirtyeight')\n",
+ "\n",
+ "plt.style.use(\"fivethirtyeight\")\n",
  "import pandas as pd"
  ]
  },
@@ -106,8 +107,8 @@
  "rollings = np.random.randint(1, 7, 1000)\n",
  "\n",
  "fig, ax = plt.subplots(figsize=(9, 9))\n",
- "n, bins, patches = ax.hist(rollings, bins = 6)\n",
- "ax.set_title('Frequency Histogram of 1000 Times of Rolling a Dice', size = 19)\n",
+ "n, bins, patches = ax.hist(rollings, bins=6)\n",
+ "ax.set_title(\"Frequency Histogram of 1000 Times of Rolling a Dice\", size=19)\n",
  "ax.set_xlim(0, 7)\n",
  "ax.set_ylim(0, 400)\n",
  "plt.show()"
@@ -139,7 +140,7 @@
  "source": [
  "x = np.random.randn(1000)\n",
  "fig, ax = plt.subplots(figsize=(9, 9))\n",
- "n, bins, patches = ax.hist(x, bins = 50, density=True)"
+ "n, bins, patches = ax.hist(x, bins=50, density=True)"
  ]
  },
  {
@@ -168,7 +169,7 @@
  }
  ],
  "source": [
- "fig, ax = plt.subplots(nrows = 2, ncols = 1,figsize=(9, 9))\n",
+ "fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(9, 9))\n",
  "ax[0].plot(bins[:50], n)\n",
  "ax[1].plot(np.cumsum(n))\n",
  "plt.show()"
@@ -374,7 +375,7 @@
  }
  ],
  "source": [
- "q75, q25 = np.percentile(x, [75 ,25]) # IQR\n",
+ "q75, q25 = np.percentile(x, [75, 25])  # IQR\n",
  "q75 - q25"
  ]
  },
@@ -441,8 +442,8 @@
  }
  ],
  "source": [
- "sample_height = np.random.choice(population_height, size = 100)\n",
- "np.var(sample_height, ddof = 1)"
+ "sample_height = np.random.choice(population_height, size=100)\n",
+ "np.var(sample_height, ddof=1)"
  ]
  },
  {
@@ -473,13 +474,18 @@
  "source": [
  "sample_height_array = []\n",
  "for i in range(10000):\n",
- " sample_height = np.random.choice(population_height, size = 100)\n",
+ " sample_height = np.random.choice(population_height, size=100)\n",
  " sample_height_array.append(np.var(sample_height, ddof=1))\n",
  "fig, ax = plt.subplots(figsize=(9, 9))\n",
- "n, bins, patches = ax.hist(sample_height_array, bins = 50)\n",
- "ax.axvline(x=np.mean(sample_height_array), color = 'tomato')\n",
- "ax.text(np.mean(sample_height_array)+1, np.max(n), r'$\\mu_\\sigma = {:.2f}$'.format(np.mean(sample_height_array)), size = 16)\n",
- "ax.set_title('Sampling Distribution of Variance Estimates', size = 19)\n",
+ "n, bins, patches = ax.hist(sample_height_array, bins=50)\n",
+ "ax.axvline(x=np.mean(sample_height_array), color=\"tomato\")\n",
+ "ax.text(\n",
+ " np.mean(sample_height_array) + 1,\n",
+ " np.max(n),\n",
+ " r\"$\\mu_\\sigma = {:.2f}$\".format(np.mean(sample_height_array)),\n",
+ " size=16,\n",
+ ")\n",
+ "ax.set_title(\"Sampling Distribution of Variance Estimates\", size=19)\n",
  "plt.show()"
  ]
  },
@@ -577,7 +583,7 @@
  ],
  "source": [
  "x = np.random.randn(10)\n",
- "z = (x - np.mean(x))/np.std(x)\n",
+ "z = (x - np.mean(x)) / np.std(x)\n",
  "np.round(z, 2)"
  ]
  },
@@ -607,8 +613,8 @@
  "metadata": {},
  "outputs": [],
  "source": [
- "z_l = (166 - 174)/4 # lower z-score\n",
- "z_u = (182 - 174)/4 # upper z-score"
+ "z_l = (166 - 174) / 4  # lower z-score\n",
+ "z_u = (182 - 174) / 4  # upper z-score"
  ]
  },
  {
@@ -632,8 +638,8 @@
  }
  ],
  "source": [
- "p = 1 - 1/z_l**2\n",
- "print('At least {0}% of people are within 168cm and 182cm in Helsinki.'.format(p*100))"
+ "p = 1 - 1 / z_l**2\n",
+ "print(\"At least {0}% of people are within 168cm and 182cm in Helsinki.\".format(p * 100))"
  ]
  },
  {
@@ -663,19 +669,20 @@
  ],
  "source": [
  "def chebyshev(z):\n",
- " return 1 - 1/z**2\n",
+ " return 1 - 1 / z**2\n",
+ "\n",
  "\n",
  "chebyshev_array = []\n",
  "for z in np.arange(1, 21, 0.5):\n",
  " chebyshev_array.append(chebyshev(z))\n",
  "\n",
  "fig, ax = plt.subplots(figsize=(9, 9))\n",
  "ax.plot(np.arange(1, 21, 0.5), chebyshev_array)\n",
- "ax.scatter(2.5, chebyshev(2.5), s = 100, color = 'red', zorder = 3)\n",
- "ax.text(2.5+.5, chebyshev(2.5), r'(2.5, {}%)'.format(chebyshev(2.5)*100))\n",
+ "ax.scatter(2.5, chebyshev(2.5), s=100, color=\"red\", zorder=3)\n",
+ "ax.text(2.5 + 0.5, chebyshev(2.5), r\"(2.5, {}%)\".format(chebyshev(2.5) * 100))\n",
  "ax.set_title(\"Chebyshev's Theorem\")\n",
- "ax.set_xlabel('z-score')\n",
- "ax.set_ylabel('Probability')\n",
+ "ax.set_xlabel(\"z-score\")\n",
+ "ax.set_ylabel(\"Probability\")\n",
  "plt.show()"
  ]
  },
@@ -785,6 +792,7 @@
  ],
  "source": [
  "import plot_material\n",
+ "\n",
  "plot_material.reg_corr_plot()"
  ]
  },
@@ -827,11 +835,12 @@
  ],
  "source": [
  "X = np.linspace(-10, 10, 200)\n",
- "Y = 1/(1+np.exp(-X))\n",
- "df_dict = {'X': X, 'Y': Y}\n",
+ "Y = 1 / (1 + np.exp(-X))\n",
+ "df_dict = {\"X\": X, \"Y\": Y}\n",
  "df = pd.DataFrame(df_dict)\n",
  "\n",
- "df.plot(x ='X', y ='Y', kind='scatter', figsize=(16, 7)); plt.show()"
+ "df.plot(x=\"X\", y=\"Y\", kind=\"scatter\", figsize=(16, 7))\n",
+ "plt.show()"
  ]
  },
  {
@@ -891,7 +900,7 @@
  }
  ],
  "source": [
- "df.corr(method='pearson')"
+ "df.corr(method=\"pearson\")"
  ]
  },
  {
@@ -908,7 +917,7 @@
  }
  ],
  "source": [
- "print('Pearson coeffcient: {}'.format(sp.stats.stats.pearsonr(df['X'], df['Y'])[0]))"
+ "print(\"Pearson coeffcient: {}\".format(sp.stats.stats.pearsonr(df[\"X\"], df[\"Y\"])[0]))"
  ]
  },
  {
@@ -925,7 +934,7 @@
  }
  ],
  "source": [
- "print('Pearson coeffcient: {}'.format(sp.stats.stats.spearmanr(df['X'], df['Y'])[0]))"
+ "print(\"Pearson coeffcient: {}\".format(sp.stats.stats.spearmanr(df[\"X\"], df[\"Y\"])[0]))"
  ]
  },
  {
@@ -943,7 +952,7 @@
  ],
  "source": [
  "sp.stats.stats.kendalltau(X, Y)\n",
- "print('Pearson coeffcient: {}'.format(sp.stats.stats.kendalltau(df['X'], df['Y'])[0]))"
+ "print(\"Pearson coeffcient: {}\".format(sp.stats.stats.kendalltau(df[\"X\"], df[\"Y\"])[0]))"
  ]
  },
  {