-
Notifications
You must be signed in to change notification settings - Fork 3
/
analyze_stats.py
130 lines (101 loc) · 3.41 KB
/
analyze_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from datetime import datetime, timezone
import click
import matplotlib.pyplot as plt
import pandas as pd
def _load_downloads():
df = pd.read_csv("data/stats.csv", index_col="timestamp", parse_dates=True)
df.index = pd.to_datetime(df.index, format="ISO8601", utc=True)
return df
def _load_firefox():
df = pd.read_csv("data/firefox-daily-users.csv", index_col="date", parse_dates=True)
df = df.tz_localize(tz=timezone.utc)
df.columns = ["Firefox DAU"]
return df
def _load_chrome():
df = pd.read_csv("data/chrome-weekly-users.csv", index_col="Date", parse_dates=True)
df = df.tz_localize(tz=timezone.utc)
df.columns = ["Chrome WAU"]
return df
def _load_android():
df = pd.read_csv("data/android/installed.csv", index_col="Date", parse_dates=True)
df = df.tz_localize(tz=timezone.utc)
df.drop(columns=["Notes"], inplace=True)
col_name = "Android installed devices"
df.columns = [col_name]
return df
def _load_data():
df = _load_downloads()
df = df.resample("1D").mean()
df = df.merge(_load_chrome(), how="outer", left_index=True, right_index=True)
df = df.merge(_load_firefox(), how="outer", left_index=True, right_index=True)
df = df.merge(_load_android(), how="outer", left_index=True, right_index=True)
return df
def test_load():
_load_downloads()
_load_chrome()
_load_firefox()
_load_android()
def test_load_all():
_load_data()
@click.command()
@click.option("--column")
@click.option("--save")
@click.option("--since", type=click.DateTime(["%Y-%m-%d"]))
@click.option("--per-day", is_flag=True)
@click.option("--resample", default="1D")
@click.option("--title")
def main(
column: str = None,
save: str = None,
since: datetime = None,
per_day: bool = False,
resample: str = "1D",
title: str = None,
):
n_plots = 2 if per_day else 1
df = _load_data()
df = df.resample(resample).mean()
df = df.interpolate(method="time") # interpolate missing dates
if column:
if column not in df:
print(f"Error: No such column '{column}', try one of: {list(df.columns)}")
exit(1)
df = df[column]
if since:
df = df.truncate(before=since.astimezone(timezone.utc))
gridargs = dict(axis="both", linestyle="--", linewidth=1, alpha=0.4)
plt.figure(figsize=(8, 2.5 * n_plots))
ax1 = plt.subplot(n_plots, 1, 1)
df.plot(ax=ax1, title=column)
ax1.set_title(title if title is not None else "Cumulative")
ax1.set_ylim(0)
ax1.grid(True, which="major", **gridargs)
ax1.legend()
if n_plots >= 2:
ax = plt.subplot(n_plots, 1, 2) # , sharex=ax1)
df_w = df.diff()
df_w = df_w[df_w > 0] # Filter out the crazy outlier
df_w = df_w.resample("1D").mean()
df_w = df_w.rolling("7D").mean() * 7
df_w.plot(ax=ax)
ax.set_title("Per week (rolling)")
ax.set_ylim(0)
ax.grid(True, which="major", **gridargs)
ax.legend()
# if n_plots >= 2:
# ax2 = plt.subplot(n_plots, 1, 2, sharex=ax1)
# df_d = df.diff()
# df_d = df_d[df_d > 0] # Filter out the crazy outlier
# df_d.plot(ax=ax2)
# ax2.set_title("Per day")
# ax2.set_ylim(0)
plt.tight_layout()
# plt.subplots_adjust(hspace=0.10)
# print(df)
if save:
plt.savefig(save)
# plt.show()
else:
plt.show()
if __name__ == "__main__":
main()