-
Notifications
You must be signed in to change notification settings - Fork 7
/
syslog-daemons-per-10-minutes.py
109 lines (57 loc) · 1.8 KB
/
syslog-daemons-per-10-minutes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env python
# coding: utf-8
# # Syslog csv visualization
#
# This script uses pandas to generate:
#
# - Simple plot of of a syslog.csv. The csv file has been previously created by [syslog-to-csv.py](https://github.com/gm3dmo/syslog-to-csv/blob/main/syslog-to-csv.py)
# - Summary and count of the daemons which wrote to sylog in csv and markdown format.
#
# In[ ]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import pathlib
cwd = pathlib.Path.cwd()
# In[ ]:
pd.set_option("display.max_rows", 1000)
csv_file = cwd / "syslog.csv"
df = pd.read_csv(csv_file)
# In[1]:
df.info()
# Create a pandas datetime column
# In[2]:
# Do conversions
# syslog = real_date
df["real_date"] = pd.to_datetime(df["unix_timestamp"], unit="s")
df.info()
# Create the *buckets*. We've chosen `600S` for the granularity of the bucket. Other frequencies can be chosen and are documented in the [offset-aliases](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases)
# In[4]:
buckets = df.groupby(
[pd.Grouper(key="real_date", axis=0, freq="600S"), "daemon"]
).count()
# In[5]:
buckets
# #### Which daemons are producing the most messages per 10 minutes?
# In[6]:
buckets_of_wiped_line = (
df.groupby([pd.Grouper(key="real_date", axis=0, freq="600S"), "daemon"])[
"wiped_line"
]
.count()
.unstack()
)
# In[14]:
# buckets_of_wiped_line.plot()
# In[8]:
sns.lineplot(data=buckets_of_wiped_line)
plt.xticks(rotation=45)
plt.legend(
title="Lines written to syslog by daemon per 10 minutes", bbox_to_anchor=(1.05, 1)
)
# ### Generate summaries of the bucket data
# In[11]:
buckets_of_wiped_line.to_markdown("syslog-10-minute-breakdown.md")
# In[10]:
buckets_of_wiped_line.to_csv("syslog-10-minute-breakdown.csv")
# In[ ]: