-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
84 lines (72 loc) · 2.35 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import requests
import _sqlite3
import time
def req(comment_id, max_id, max_id_type):
if max_id == '':
url = f'https://m.weibo.cn/comments/hotflow?id={comment_id}&mid={comment_id}&max_id_type={max_id_type}'
else:
url = f'https://m.weibo.cn/comments/hotflow?id={comment_id}&mid={comment_id}&max_id={max_id}&max_id_type={max_id_type}'
# add your cookie here
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
# 'cookie': ''
}
content = requests.get(url, headers=headers)
return content
# replace the weibo id you want to get comment of
weibo_id = 1111222233334444
db_name = str(weibo_id) + '_comments.db'
conn = _sqlite3.connect(db_name)
c = conn.cursor()
c.execute('''
CREATE TABLE IF NOT EXISTS comments(
id TEXT PRIMARY KEY,
created_time TEXT,
screen_name TEXT,
user_id TEXT,
content TEXT
)
''')
retry_count = 0
max_retries = 5
max_id_type = 0
data = req(weibo_id, max_id='', max_id_type=0).json()
comments = data['data']['data']
max_id = data['data']['max_id']
max_page = data['data']['max']
total_num = data['data']['total_number']
count = 1
print(f'total {max_page} pages,{total_num} comments')
print(f'the first page has {len(comments)} comments')
time.sleep(5)
while retry_count < max_retries:
try:
content = req(weibo_id, max_id, max_id_type).json()
if content['ok'] == 0:
break
max_id = content['data']['max_id']
max_id_type = content['data']['max_id_type']
content = content['data']['data']
count += 1
print(f'page {count} has {len(content)} comments')
for comment in content:
c.execute('''
INSERT OR REPLACE INTO comments VALUES (?,?,?,?,?)
''', (
comment['id'],
comment['created_at'],
comment['user']['screen_name'],
comment['user']['id'],
comment['text']
))
conn.commit()
if max_id == 0:
break
retry_count = 0
time.sleep(5)
except Exception as e:
retry_count += 1
print(f'an error occurred:{e},retrying...')
time.sleep(5)
continue
conn.close()