Skip to content

Latest commit

 

History

History
111 lines (81 loc) · 3.11 KB

File metadata and controls

111 lines (81 loc) · 3.11 KB

One-Way ANOVA


import pandas as pd

# read data from etf_returns.csv.
etf_returns_df = pd.read_csv('etf_returns.csv')

# print etf returns data set.
print(etf_returns_df)
    financial  energy  technology
0         5.5     5.2         7.3
1         7.1     7.4         8.2
2         6.9     6.6         7.1
3         5.1     5.7         7.6
4         4.6     5.6         8.2
5         5.3     5.5        11.5
6         5.9     6.4         9.2
7         5.6     6.1         9.5
8         5.5     5.2         7.3
9         7.1     7.4         8.2
10        6.9     6.6         7.1
11        5.1     5.7         7.6
12        4.6     5.6         8.2
13        5.3     5.5        11.5
14        5.9     6.4         9.2
15        5.6     6.1         9.5
16        4.7     4.4         6.2
17        6.4     6.6         7.4
18        6.7     6.4         6.9
19        4.3     4.8         6.4
20        4.1     5.0         7.4
21        5.1     5.3        11.1
22        5.7     6.2         8.9
23        4.7     5.2         8.1
24        5.3     5.0         7.1
25        6.4     6.6         7.4
26        5.8     5.6         6.0
27        4.9     5.5         7.4
28        4.1     5.0         7.4
29        4.8     4.9        10.3
import scipy.stats as st

# save return data for individual sectors for input to f_oneway method.
etf_returns_financial = etf_returns_df['financial']
etf_returns_energy = etf_returns_df['energy']
etf_returns_technology = etf_returns_df['technology']

# print the outputs: the test statistic and the P-value.
test_statistic, p_value = st.f_oneway(etf_returns_financial, etf_returns_energy, etf_returns_technology)

print("test statistic =", round(test_statistic,2))
print("P-value =", round(p_value,4))
test statistic = 55.07
P-value = 0.0
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import random

# side-by-side boxplots require the three dataframes to be concatenated and a require variable identifying the type of ETF.
etf_returns_financial_df = etf_returns_df[['financial']]
etf_returns_financial_df = etf_returns_financial_df.rename(columns={"financial": "return"})
etf_returns_financial_df['ETF'] = 'financial'

etf_returns_energy_df = etf_returns_df[['energy']]
etf_returns_energy_df = etf_returns_energy_df.rename(columns={"energy": "return"})
etf_returns_energy_df['ETF'] = 'energy'

etf_returns_technology_df = etf_returns_df[['technology']]
etf_returns_technology_df = etf_returns_technology_df.rename(columns={"technology": "return"})
etf_returns_technology_df['ETF'] = 'technology'

# concatenate dataframes for the three ETFs.
all_etfs_df = pd.concat((etf_returns_financial_df, etf_returns_energy_df, etf_returns_technology_df))

# set a title for the plot, x-axis, and y-axis.
plt.title('Boxplot for comparison', fontsize=20) 

# prepare the boxplot.
sns.boxplot(x="ETF",y="return",data=all_etfs_df)

# show the plot.
plt.show()

png

BACK