forked from Kedreamix/Linly-Dubbing
-
Notifications
You must be signed in to change notification settings - Fork 0
/
webui.py
205 lines (188 loc) · 9.27 KB
/
webui.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import gradio as gr
from tools.step000_video_downloader import download_from_url
from tools.step010_demucs_vr import separate_all_audio_under_folder
from tools.step020_asr import transcribe_all_audio_under_folder
from tools.step030_translation import translate_all_transcript_under_folder
from tools.step040_tts import generate_all_wavs_under_folder
from tools.step050_synthesize_video import synthesize_all_video_under_folder
from tools.do_everything import do_everything
from tools.utils import SUPPORT_VOICE
# 一键自动化界面
full_auto_interface = gr.Interface(
fn=do_everything,
inputs=[
gr.Textbox(label='视频输出文件夹', value='videos'),
gr.Textbox(label='视频URL', placeholder='请输入Youtube或Bilibili的视频、播放列表或频道的URL',
value='https://www.bilibili.com/video/BV1kr421M7vz/'),
gr.Slider(minimum=1, maximum=100, step=1, label='下载视频数量', value=5),
gr.Radio(['4320p', '2160p', '1440p', '1080p', '720p', '480p', '360p', '240p', '144p'], label='分辨率', value='1080p'),
gr.Radio(['htdemucs', 'htdemucs_ft', 'htdemucs_6s', 'hdemucs_mmi', 'mdx', 'mdx_extra', 'mdx_q', 'mdx_extra_q', 'SIG'], label='模型', value='htdemucs_ft'),
gr.Radio(['auto', 'cuda', 'cpu'], label='计算设备', value='auto'),
gr.Slider(minimum=0, maximum=10, step=1, label='移位次数 Number of shifts', value=5),
gr.Dropdown(['WhisperX', 'FunASR'], label='ASR模型选择', value='WhisperX'),
gr.Radio(['large', 'medium', 'small', 'base', 'tiny'], label='WhisperX模型大小', value='large'),
gr.Slider(minimum=1, maximum=128, step=1, label='批处理大小 Batch Size', value=32),
gr.Checkbox(label='分离多个说话人', value=True),
gr.Radio([None, 1, 2, 3, 4, 5, 6, 7, 8, 9], label='最小说话人数', value=None),
gr.Radio([None, 1, 2, 3, 4, 5, 6, 7, 8, 9], label='最大说话人数', value=None),
gr.Dropdown(['OpenAI', 'LLM', 'Google Translate', 'Bing Translate', 'Ernie'], label='翻译方式', value='LLM'),
gr.Dropdown(['简体中文', '繁体中文', 'English', 'Cantonese', 'Japanese', 'Korean'], label='目标语言', value='简体中文'),
gr.Dropdown(['xtts', 'cosyvoice', 'EdgeTTS'], label='AI语音生成方法', value='xtts'),
gr.Dropdown(['中文', 'English', '粤语', 'Japanese', 'Korean', 'Spanish', 'French'], label='目标语言', value='中文'),
gr.Dropdown(SUPPORT_VOICE, value='zh-CN-XiaoxiaoNeural', label='EdgeTTS声音选择'),
gr.Checkbox(label='添加字幕', value=True),
gr.Slider(minimum=0.5, maximum=2, step=0.05, label='加速倍数', value=1.00),
gr.Slider(minimum=1, maximum=60, step=1, label='帧率', value=30),
gr.Audio(label='背景音乐', sources=['upload']),
gr.Slider(minimum=0, maximum=1, step=0.05, label='背景音乐音量', value=0.5),
gr.Slider(minimum=0, maximum=1, step=0.05, label='视频音量', value=1.0),
gr.Radio(['4320p', '2160p', '1440p', '1080p', '720p', '480p', '360p', '240p', '144p'], label='分辨率', value='1080p'),
gr.Slider(minimum=1, maximum=100, step=1, label='Max Workers', value=1),
gr.Slider(minimum=1, maximum=10, step=1, label='Max Retries', value=3),
],
outputs=[gr.Text(label='合成状态'), gr.Video(label='合成视频样例结果')],
allow_flagging='never',
)
# 下载视频接口
download_interface = gr.Interface(
fn=download_from_url,
inputs=[
gr.Textbox(label='视频URL', placeholder='请输入Youtube或Bilibili的视频、播放列表或频道的URL',
value='https://www.bilibili.com/video/BV1kr421M7vz/'),
gr.Textbox(label='视频输出文件夹', value='videos'),
gr.Radio(['4320p', '2160p', '1440p', '1080p', '720p', '480p', '360p', '240p', '144p'], label='分辨率', value='1080p'),
gr.Slider(minimum=1, maximum=100, step=1, label='下载视频数量', value=5),
# gr.Checkbox(label='单个视频', value=False),
],
outputs=[
gr.Textbox(label='下载状态'),
gr.Video(label='示例视频'),
gr.Json(label='下载信息')
],
allow_flagging='never',
)
# 人声分离接口
demucs_interface = gr.Interface(
fn=separate_all_audio_under_folder,
inputs=[
gr.Textbox(label='视频文件夹', value='videos'),
gr.Radio(['htdemucs', 'htdemucs_ft', 'htdemucs_6s', 'hdemucs_mmi', 'mdx', 'mdx_extra', 'mdx_q', 'mdx_extra_q', 'SIG'], label='模型', value='htdemucs_ft'),
gr.Radio(['auto', 'cuda', 'cpu'], label='计算设备', value='auto'),
gr.Checkbox(label='显示进度条', value=True),
gr.Slider(minimum=0, maximum=10, step=1, label='移位次数 Number of shifts', value=5),
],
outputs=[
gr.Text(label='分离结果状态'),
gr.Audio(label='人声音频'),
gr.Audio(label='伴奏音频')
],
allow_flagging='never',
)
# AI智能语音识别接口
asr_inference = gr.Interface(
fn=transcribe_all_audio_under_folder,
inputs=[
gr.Textbox(label='视频文件夹', value='videos'),
gr.Dropdown(['WhisperX', 'FunASR'], label='ASR模型选择', value='WhisperX'),
gr.Radio(['large', 'medium', 'small', 'base', 'tiny'], label='WhisperX模型大小', value='large'),
gr.Radio(['auto', 'cuda', 'cpu'], label='计算设备', value='auto'),
gr.Slider(minimum=1, maximum=128, step=1, label='批处理大小 Batch Size', value=32),
gr.Checkbox(label='分离多个说话人', value=True),
gr.Radio([None, 1, 2, 3, 4, 5, 6, 7, 8, 9], label='最小说话人数', value=None),
gr.Radio([None, 1, 2, 3, 4, 5, 6, 7, 8, 9], label='最大说话人数', value=None),
],
outputs=[
gr.Text(label='语音识别状态'),
gr.Json(label='识别结果详情')
],
allow_flagging='never',
)
# 翻译字幕接口
translation_interface = gr.Interface(
fn=translate_all_transcript_under_folder,
inputs=[
gr.Textbox(label='视频文件夹', value='videos'),
gr.Dropdown(['OpenAI', 'LLM', 'Google Translate', 'Bing Translate', 'Ernie'], label='翻译方式', value='LLM'),
gr.Dropdown(['简体中文', '繁体中文', 'English', 'Cantonese', 'Japanese', 'Korean'], label='目标语言', value='简体中文'),
],
outputs=[
gr.Text(label='翻译状态'),
gr.Json(label='总结结果'),
gr.Json(label='翻译结果')
],
allow_flagging='never',
)
# AI语音合成接口
tts_interface = gr.Interface(
fn=generate_all_wavs_under_folder,
inputs=[
gr.Textbox(label='视频文件夹', value='videos'),
gr.Dropdown(['xtts', 'cosyvoice', 'EdgeTTS'], label='AI语音生成方法', value='xtts'),
gr.Dropdown(['中文', 'English', '粤语', 'Japanese', 'Korean', 'Spanish', 'French'], label='目标语言', value='中文'),
gr.Dropdown(SUPPORT_VOICE, value='zh-CN-XiaoxiaoNeural', label='EdgeTTS声音选择'),
],
outputs=[
gr.Text(label='合成状态'),
gr.Audio(label='合成语音'),
gr.Audio(label='原始音频')
],
allow_flagging='never',
)
# 视频合成接口
synthesize_video_interface = gr.Interface(
fn=synthesize_all_video_under_folder,
inputs=[
gr.Textbox(label='视频文件夹', value='videos'),
gr.Checkbox(label='添加字幕', value=True),
gr.Slider(minimum=0.5, maximum=2, step=0.05, label='加速倍数', value=1.00),
gr.Slider(minimum=1, maximum=60, step=1, label='帧率', value=30),
gr.Audio(label='背景音乐', sources=['upload'], type='filepath'),
gr.Slider(minimum=0, maximum=1, step=0.05, label='背景音乐音量', value=0.5),
gr.Slider(minimum=0, maximum=1, step=0.05, label='视频音量', value=1.0),
gr.Radio(['4320p', '2160p', '1440p', '1080p', '720p', '480p', '360p', '240p', '144p'], label='分辨率', value='1080p'),
],
outputs=[
gr.Text(label='合成状态'),
gr.Video(label='合成视频')
],
allow_flagging='never',
)
linly_talker_interface = gr.Interface(
fn=lambda: None,
inputs=[
gr.Textbox(label='视频文件夹', value='videos'),
gr.Dropdown(['Wav2Lip', 'Wav2Lipv2','SadTalker'], label='AI配音方式', value='Wav2Lip'),
],
outputs=[
gr.Markdown(value="施工中,请静候佳音 可参考 [https://github.com/Kedreamix/Linly-Talker](https://github.com/Kedreamix/Linly-Talker)"),
gr.Text(label='合成状态'),
gr.Video(label='合成视频')
],
)
my_theme = gr.themes.Soft()
# 应用程序界面
app = gr.TabbedInterface(
theme=my_theme,
interface_list=[
full_auto_interface,
download_interface,
demucs_interface,
asr_inference,
translation_interface,
tts_interface,
synthesize_video_interface,
linly_talker_interface
],
tab_names=[
'一键自动化 One-Click',
'自动下载视频 ', '人声分离', 'AI智能语音识别', '字幕翻译', 'AI语音合成', '视频合成',
'Linly-Talker 对口型(开发中)'],
title='智能视频多语言AI配音/翻译工具 - Linly-Dubbing'
)
if __name__ == '__main__':
app.launch(
server_name="127.0.0.1",
server_port=6006,
share=True,
inbrowser=True
)