-
Notifications
You must be signed in to change notification settings - Fork 55
/
main.py
92 lines (85 loc) · 2.05 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import pandas
import sqlite3
conn = sqlite3.connect("v2ex.sqlite")
e = {
"top-comment": """
select topic_id, c.id, c.content, c.thank_count, c.no, t.title
from comment c
left join topic t on t.id = c.topic_id
order by c.thank_count desc
""",
"top-topic-by-thank_count": """
select id, title, thank_count
from topic
order by thank_count desc
""",
"top-topic-by-favorite_count": """
select id, title, favorite_count
from topic
order by favorite_count desc
""",
"top-topic-by-votes": """
select id, title, votes
from topic
order by votes desc
""",
"top-topic-by-clicks": """
select id, title, clicks
from topic
order by clicks desc
""",
"tag-usage-count": """
select t.value as tag, count(*) as count
from topic,
json_each(tag) as t
group by t.value
order by count desc
""",
"top-user-by-comment_count": """
select commenter as username, count(commenter) as comment_count
from comment
group by commenter
order by comment_count desc
""",
"top-user-by-topic_count": """
select author as username, count(author) as topic_count
from topic
group by author
order by topic_count desc
""",
}
e2 = {
"new-topic-every-month": """
SELECT strftime('%Y-%m', create_at, 'unixepoch') AS date, COUNT(*) AS topic_count
FROM topic
GROUP BY date
""",
"new-comment-every-month": """
SELECT strftime('%Y-%m', create_at, 'unixepoch') AS date, COUNT(*) AS comment_count
FROM comment
GROUP BY date
""",
"new-member-every-month": """
SELECT strftime('%Y-%m', create_at, 'unixepoch') AS date, COUNT(*) AS member_count
FROM member
GROUP BY date
""",
}
LIMIT = 20
def f(
sql: str,
export_name: str,
conn,
limit: int | None = LIMIT,
orient: str | None = "records",
):
ae = "" if limit is None else f" limit {limit}"
pandas.read_sql(f"{sql} {ae}", conn).to_json(
f"./analysis/v2ex-analysis/public/{export_name}.json",
force_ascii=False,
orient=orient,
)
for i, sql in e.items():
f(sql=sql, export_name=i, conn=conn)
for i, sql in e2.items():
f(sql=sql, export_name=i, conn=conn, limit=None, orient=None)