Skip to content

Commit

Permalink
🐕prepare for version 3.2.0
Browse files Browse the repository at this point in the history
🐕prepare for version 3.2.0
  • Loading branch information
iofu728 authored Aug 10, 2019
2 parents e094391 + d9fdbce commit 2d85598
Show file tree
Hide file tree
Showing 10 changed files with 593 additions and 273 deletions.
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -583,4 +583,26 @@ while (z++)
![image](https://cdn.nlark.com/yuque/0/2019/png/104214/1557240022438-bc891ec5-7bbc-412a-b4d4-f330608d21f0.png)
## OceanBall V2
check param list:
| param | Ctrip | Incognito | Node | !!import |
| ------------ | ----- | --------- | ---- | -------- |
| define || x | x |
| \_\_filename | x | x | x |
| module | x | x || x |
| process || x ||
| \_\_dirname || x | x |
| global | x | x || x |
| INT_MAX || x | x |
| require || x |||
| History || x |
| Location || x |
| Window || x |
| Document || x |
| window || x |
| navigator || x |
| history || x |
**----To be continued----**
107 changes: 73 additions & 34 deletions bilibili/analysis.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,78 @@
'''
@Author: gunjianpan
@Date: 2019-04-04 10:57:24
@Last Modified by: gunjianpan
@Last Modified time: 2019-04-04 14:02:54
'''
# -*- coding: utf-8 -*-
# @Author: gunjianpan
# @Date: 2019-04-04 10:57:24
# @Last Modified by: gunjianpan
# @Last Modified time: 2019-08-10 14:47:13

import pandas as pd
import numpy as np
import time
import os
from util.util import time_stamp, echo, read_file

data_dir = 'bilibili/'
df = pd.read_csv('%spublic.csv' % data_dir)

'''one day'''
df['fan'] = df['3'].fillna(0)
df['time'] = df['1'].map(lambda x: x.split(None, 1)[1])
df['fanadd'] = df['4'] - df['3']
df['fanadd'] = df['fanadd'].map(lambda x: x if x > 0 else 0)
df['fanadd_ratio'] = df['fanadd'] / df['3']
df['fanadd_ratio'] = df['fanadd_ratio'].replace(
[np.inf, -np.inf], np.nan).fillna(0)
df['viewadd'] = (df['18'] - df['6']).fillna(0)
df['viewadd'] = df['viewadd'].map(lambda x: x if x > 0 else 0)
df['viewadd_ratio'] = (df['viewadd'] / df['6']).replace(
[np.inf, -np.inf], np.nan).fillna(0)
df['view_fan'] = (df['viewadd'] / df['3']).replace(
[np.inf, -np.inf], np.nan).fillna(0)
df['view_fan_20'] = df['view_fan'].map(lambda x: x if x < 20 else 0)
df['view_fanadd'] = (df['viewadd'] / df['fanadd']).replace(
[np.inf, -np.inf], np.nan).fillna(0)

'''seven day'''
df['seven'] = df['1'].map(lambda x: '1970-01-%d %s' % (int(time.strftime(
"%w", time.strptime(x, "%Y-%m-%d %H:%M:%S"))) + 4, x.split(None, 1)[1]))
need_columns = ['time', 'fan', 'fanadd', 'fanadd_ratio',
'viewadd', 'viewadd_ratio', 'view_fan', 'view_fan_20', 'view_fanadd', 'seven']
result_df = pd.DataFrame(df, columns=need_columns)
result_df.to_csv('%spublic_re.csv' % data_dir, index=False)
data_dir = 'bilibili/data/'
history_data_dir = '{}history_data/'.format(data_dir)
history_dir = '{}history/'.format(data_dir)


def analysis_csv():
data_dir = 'bilibili/'
df = pd.read_csv('%spublic.csv' % data_dir)

'''one day'''
df['fan'] = df['3'].fillna(0)
df['time'] = df['1'].map(lambda x: x.split(None, 1)[1])
df['fanadd'] = df['4'] - df['3']
df['fanadd'] = df['fanadd'].map(lambda x: x if x > 0 else 0)
df['fanadd_ratio'] = df['fanadd'] / df['3']
df['fanadd_ratio'] = df['fanadd_ratio'].replace(
[np.inf, -np.inf], np.nan).fillna(0)
df['viewadd'] = (df['18'] - df['6']).fillna(0)
df['viewadd'] = df['viewadd'].map(lambda x: x if x > 0 else 0)
df['viewadd_ratio'] = (df['viewadd'] / df['6']).replace(
[np.inf, -np.inf], np.nan).fillna(0)
df['view_fan'] = (df['viewadd'] / df['3']).replace(
[np.inf, -np.inf], np.nan).fillna(0)
df['view_fan_20'] = df['view_fan'].map(lambda x: x if x < 20 else 0)
df['view_fanadd'] = (df['viewadd'] / df['fanadd']).replace(
[np.inf, -np.inf], np.nan).fillna(0)

'''seven day'''
df['seven'] = df['1'].map(lambda x: '1970-01-%d %s' % (int(time.strftime(
"%w", time.strptime(x, "%Y-%m-%d %H:%M:%S"))) + 4, x.split(None, 1)[1]))
need_columns = ['time', 'fan', 'fanadd', 'fanadd_ratio',
'viewadd', 'viewadd_ratio', 'view_fan', 'view_fan_20', 'view_fanadd', 'seven']
result_df = pd.DataFrame(df, columns=need_columns)
result_df.to_csv('%spublic_re.csv' % data_dir, index=False)


def clean_csv(av_id: int):
''' clean csv '''
csv_path = os.path.join(history_dir, '{}.csv'.format(av_id))
output_path = os.path.join(history_data_dir, '{}_new.csv'.format(av_id))
csv = read_file(csv_path)
last_time, last_view = csv[0].split(',')[:2]
result = [csv[0]]
last_time = time_stamp(last_time)
last_view = int(last_view)
empty_line = ','.join([' '] * (len(csv[0].split(',')) + 1))
for line in csv[1:]:
now_time, now_view = line.split(',')[:2]
now_time = time_stamp(now_time)
now_view = int(now_view)
time_gap = now_time - last_time

if now_view < last_view or now_view - last_view > 5000:
# echo(1, last_view, last_time, now_view, now_time)
continue
if abs(time_gap) > 150:
for ii in range(int((time_gap - 30) // 120)):
result.append(empty_line)
if abs(time_gap) > 90:
# echo(0, last_view, last_time, now_view, now_time)
result.append(line)
last_view, last_time = now_view, now_time
# else:
# echo(2, last_view, last_time, now_view, now_time)
with open(output_path, 'w') as f:
f.write('\n'.join(result))
32 changes: 18 additions & 14 deletions bilibili/assign_up.ini.tmp
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
[basic]
basic_av_id:21061574
basic_av_p:-1
up_name:还有一天就放假了
up_mid:
rank_id:119
tid:126
view_abnormal:1000
basic_av_id = 21061574
basic_av_p = -1
up_name = 还有一天就放假了
up_mid =
rank_id = 119
tid = 126
view_abnormal = 1000
history_check_list = 1,3,6
;split by ','

[assign]
av_ids:21061574,11624347
av_ids = 21061574,11624347
;split by ','

[comment]
keyword:死全家|草泥马|.{0,4}\$\$_.{0,4}
keyword = 死全家|草泥马|.{0,4}\$\$_.{0,4}
;support re, use '|' split
ignore_list:^[2-3].*
ignore_floor:{"21061574":["242-2"],"21062574":["1242"],"21061577":["1284"]}
ignore_start:0.5
ignore_end:8.5
email_limit:5
ignore_list = ^[2-3].*
ignore_rpid = {"21061574":["242-2"],"21062574":["1242"],"21061577":["1284"]}
ignore_start = 0.5
ignore_end = 8.5
email_limit = 5
Loading

0 comments on commit 2d85598

Please sign in to comment.