-
Notifications
You must be signed in to change notification settings - Fork 1
/
0_single_video.py
190 lines (164 loc) · 7.67 KB
/
0_single_video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# !/usr/bin/env python3
import twitch # pip install python-twitch-client
import yaml # pip install PyYAML
from webvtt import WebVTT, Caption # pip install webvtt-py
from vosk import Model, KaldiRecognizer, SetLogLevel # pip install vosk
import os
import sys
import json
import subprocess
import shutil
from datetime import datetime
import utils
import time
# the vod which we wish to download
if len(sys.argv) != 2:
print("please pass at least a single vod id to download...")
exit(-1)
vod_id_to_download = int(sys.argv[1])
render_chat = True
transcribe = False
# authentication information
path_base = os.path.dirname(os.path.abspath(__file__))
auth_config = path_base + "/config/auth.yaml"
with open(auth_config) as f:
auth = yaml.load(f, Loader=yaml.FullLoader)
client_id = auth["client_id"]
client_secret = auth["client_secret"]
# ================================================================
# ================================================================
# paths of the cli and data
path_twitch_cli = path_base + "/thirdparty/Twitch_Downloader_1.54.0/TwitchDownloaderCLI"
path_twitch_ffmpeg = path_base + "/thirdparty/ffmpeg-4.3.1-amd64-static/ffmpeg"
path_root = path_base + "/../data/"
path_temp = "/tmp/tvc_single_video/"
path_model = path_base + "/thirdparty/vosk-model-small-en-us-0.15/"
# ================================================================
# ================================================================
# setup control+c handler
utils.setup_signal_handle()
# create our twitch api python objects for query
client_helix = twitch.TwitchHelix(client_id=client_id, client_secret=client_secret)
client_helix.get_oauth()
print("trying to pull api info for vod " + str(vod_id_to_download))
videos = client_helix.get_videos(video_ids=[vod_id_to_download])
assert (len(videos) == 1)
# create the video object with all our information
video = {
'helix': videos[0],
}
# DATA: api data of this vod
video_data = {
'id': video['helix']['id'],
'user_id': video['helix']['user_id'],
'user_name': video['helix']['user_name'],
'title': video['helix']['title'],
'duration': video['helix']['duration'],
'url': video['helix']['url'],
'views': video['helix']['view_count'],
'moments': utils.get_vod_moments(video['helix']['id']),
'muted_segments': (video['helix']['muted_segments'] if video['helix']['muted_segments'] != None else []),
'recorded_at': video['helix']['created_at'].strftime('%Y-%m-%dT%H:%M:%SZ'),
}
# check if the directory is created
path_data = path_root + "/" + video_data['user_name'].lower() + "/"
if not os.path.exists(path_data):
os.makedirs(path_data)
if not os.path.exists(path_temp):
os.makedirs(path_temp)
print("saving into " + video_data['user_name'].lower() + " user folder")
# extract what folder we should save into
# create the folder if it isn't created already
try:
date = datetime.strptime(video_data['recorded_at'], '%Y-%m-%dT%H:%M:%SZ')
export_folder = format(date.year, '02') + "-" + format(date.month, '02') + "/"
except:
export_folder = "unknown/"
if not os.path.exists(path_data + export_folder):
os.makedirs(path_data + export_folder)
# VIDEO: check if the file exists
file_path_info = path_data + export_folder + str(video['helix']['id']) + "_info.json"
print("saving video info: " + file_path_info)
if not utils.terminated_requested and not os.path.exists(file_path_info):
with open(file_path_info, 'w', encoding="utf-8") as file:
json.dump(video_data, file, indent=4)
# VIDEO: check if the file exists
file_path = path_data + export_folder + str(video['helix']['id']) + ".mp4"
print("download video: " + file_path)
if not utils.terminated_requested and not os.path.exists(file_path):
cmd = path_twitch_cli + ' videodownload' \
+ ' --id ' + str(video['helix']['id']) + ' --ffmpeg-path "' + path_twitch_ffmpeg + '"' \
+ ' --temp-path "' + path_temp + '" --quality 1080p60 -o ' + file_path
print(cmd)
# subprocess.Popen(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL).wait()
subprocess.Popen(cmd, shell=True).wait()
# CHAT: check if the file exists
file_path_chat = path_data + export_folder + str(video['helix']['id']) + "_chat.json"
file_path_chat_tmp = path_temp + str(video['helix']['id']) + "_chat.json"
print("download chat: " + file_path_chat)
if not utils.terminated_requested and not os.path.exists(file_path_chat):
cmd = path_twitch_cli + ' chatdownload' \
+ ' --id ' + str(video['helix']['id']) \
+ ' --embed-images --chat-connections 6' \
+ ' --bttv true --ffz true --stv true' \
+ ' -o ' + file_path_chat_tmp
#print(cmd)
subprocess.Popen(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL).wait()
#subprocess.Popen(cmd, shell=True).wait()
shutil.move(file_path_chat_tmp, file_path_chat)
# AUDIO-TO-TEXT: check if file exists
if transcribe:
file_path_webvtt = path_data + export_folder + str(video['helix']['id']) + ".vtt"
if not utils.terminated_requested and os.path.exists(file_path) and not os.path.exists(file_path_webvtt):
print("transcribing: " + file_path_webvtt)
t0 = time.time()
# open the model
SetLogLevel(-1)
sample_rate = 16000
model = Model(path_model)
rec = KaldiRecognizer(model, sample_rate)
rec.SetWords(True)
# open ffmpeg pipe stream of the audio file (from video)
command = [path_twitch_ffmpeg, '-nostdin', '-loglevel', 'quiet', '-i', file_path,
'-ar', str(sample_rate), '-ac', '1', '-f', 's16le', '-']
# process = subprocess.Popen(command, stdout=subprocess.PIPE)
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
results = []
while True:
data = process.stdout.read(4000)
if len(data) == 0:
break
if rec.AcceptWaveform(data):
text = rec.Result()
results.append(text)
results.append(rec.FinalResult())
# convert to standard format
vtt = WebVTT()
for i, res in enumerate(results):
words = json.loads(res).get('result')
if not words:
continue
for word in words:
start = utils.webvtt_time_string(word['start'])
end = utils.webvtt_time_string(word['end'])
vtt.captions.append(Caption(start, end, word['word']))
vtt.save(file_path_webvtt)
print("done in " + str(time.time() - t0) + " seconds")
# RENDER: check if the file exists
if render_chat:
file_path_chat = path_data + export_folder + str(video['helix']['id']) + "_chat.json"
file_path_render = path_data + export_folder + str(video['helix']['id']) + "_chat.mp4"
file_path_render_tmp = path_temp + str(video['helix']['id']) + "_chat.mp4"
if os.path.exists(file_path_chat) and not os.path.exists(file_path_render):
print("rendering chat: " + file_path_render)
cmd = path_twitch_cli + ' chatrender' \
+ ' -i ' + file_path_chat + ' -o ' + file_path_render_tmp \
+ ' --ffmpeg-path "' + path_twitch_ffmpeg + '"' \
+ ' -h 926 -w 274 --update-rate 0.1 --framerate 60 --font-size 15' \
+ ' --bttv true --ffz true --stv true --sub-messages true --badges true --sharpening true --dispersion true' \
+ ' --temp-path "' + path_temp + '" '
# + ' --background-color #111111 --message-color #ffffff' \
#print(cmd)
subprocess.Popen(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL).wait()
#subprocess.Popen(cmd, shell=True).wait()
shutil.move(file_path_render_tmp, file_path_render)