-
-
Notifications
You must be signed in to change notification settings - Fork 321
/
colab_utils.py
71 lines (64 loc) · 2.39 KB
/
colab_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#
# a modified version of this script https://github.com/magenta/ddsp/blob/master/ddsp/colab/colab_utils.py
# modified in line with the rest of examples code
#
import io
import base64
import tempfile
from typing import Optional
from src.silero.utils import read_audio
from pydub import AudioSegment
from google.colab import files
from google.colab import output
from IPython import display as _display
def record_audio(seconds: int = 3,
normalize_db: float = 0.1):
# Use Javascript to record audio.
record_js_code = """
const sleep = time => new Promise(resolve => setTimeout(resolve, time))
const b2text = blob => new Promise(resolve => {
const reader = new FileReader()
reader.onloadend = e => resolve(e.srcElement.result)
reader.readAsDataURL(blob)
})
var record = time => new Promise(async resolve => {
stream = await navigator.mediaDevices.getUserMedia({ audio: true })
recorder = new MediaRecorder(stream)
chunks = []
recorder.ondataavailable = e => chunks.push(e.data)
recorder.start()
await sleep(time)
recorder.onstop = async ()=>{
blob = new Blob(chunks)
text = await b2text(blob)
resolve(text)
}
recorder.stop()
})
"""
print('Starting recording for {} seconds...'.format(seconds))
_display.display(_display.Javascript(record_js_code))
audio_string = output.eval_js('record(%d)' % (seconds * 1000.0))
print('Finished recording!')
audio_bytes = base64.b64decode(audio_string.split(',')[1])
return audio_bytes_to_np(audio_bytes,
normalize_db=normalize_db)
def audio_bytes_to_np(wav_data: bytes,
normalize_db: float = 0.1):
# Parse and normalize the audio.
audio = AudioSegment.from_file(io.BytesIO(wav_data))
audio.remove_dc_offset()
if normalize_db is not None:
audio.normalize(headroom=normalize_db)
# Save to tempfile and load with librosa.
with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav_file:
fname = temp_wav_file.name
audio.export(fname, format='wav')
wav = read_audio(fname)
return wav
def upload_audio(normalize_db: Optional[float] = None):
audio_files = files.upload()
fnames = list(audio_files.keys())
if len(fnames) == 0:
return None
return read_audio(fnames[0])