-
Notifications
You must be signed in to change notification settings - Fork 0
/
augmentations.py
58 lines (47 loc) · 1.87 KB
/
augmentations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!g1.1
from typing import Tuple, Union, List, Callable, Optional
import torch
from torch import distributions
import torchaudio
class AugsCreation:
def __init__(self):
self.background_noises = [
'speech_commands/_background_noise_/white_noise.wav',
'speech_commands/_background_noise_/dude_miaowing.wav',
'speech_commands/_background_noise_/doing_the_dishes.wav',
'speech_commands/_background_noise_/exercise_bike.wav',
'speech_commands/_background_noise_/pink_noise.wav',
'speech_commands/_background_noise_/running_tap.wav'
]
self.noises = [
torchaudio.load(p)[0].squeeze()
for p in self.background_noises
]
def add_rand_noise(self, audio):
# randomly choose noise
noise_num = torch.randint(low=0, high=len(
self.background_noises), size=(1,)).item()
noise = self.noises[noise_num]
noise_level = torch.Tensor([1]) # [0, 40]
noise_energy = torch.norm(noise)
audio_energy = torch.norm(audio)
alpha = (audio_energy / noise_energy) * \
torch.pow(10, -noise_level / 20)
start = torch.randint(
low=0,
high=max(int(noise.size(0) - audio.size(0) - 1), 1),
size=(1,)
).item()
noise_sample = noise[start: start + audio.size(0)]
audio_new = audio + alpha * noise_sample
audio_new.clamp_(-1, 1)
return audio_new
def __call__(self, wav):
aug_num = torch.randint(low=0, high=4, size=(1,)).item() # choose 1 random aug from augs
augs = [
lambda x: x,
lambda x: (x + distributions.Normal(0, 0.01).sample(x.size())).clamp_(-1, 1),
lambda x: torchaudio.transforms.Vol(.25)(x),
lambda x: self.add_rand_noise(x)
]
return augs[aug_num](wav)