-
Notifications
You must be signed in to change notification settings - Fork 0
/
prepare.py
82 lines (66 loc) · 2.56 KB
/
prepare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
import os
from pydub import AudioSegment
import magic
import soundfile as sf
import json
# Parse command line arguments
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument("-a", "--audiofile", required=True, help="path of the vocal audio file")
parser.add_argument("-l", "--lyricsfile", required=True, help="path of the lyrics text file")
parser.add_argument("-p", "--append_pipe", default=True, help="add a pipe character to the end of each lyric line")
args = vars(parser.parse_args())
def detect_audio_format(file_path):
mime_type = magic.Magic(mime=True).from_file(file_path)
if mime_type == 'audio/x-wav':
return 'wav'
elif mime_type == 'audio/flac':
return 'flac'
elif mime_type == 'audio/mpeg':
return 'mp3'
# Add more mime types as needed
else:
raise ValueError("Unsupported audio format")
print(f"Preparing audio...")
fullaudiopath = os.path.abspath(args["audiofile"])
format = detect_audio_format(fullaudiopath)
print(f" * Input format is {format}.")
# Load the audio file
sound = AudioSegment.from_file(fullaudiopath, format=format)
RESAMPLED_FILE_NAME = f"{fullaudiopath}.prepared.wav"
# Downmix to mono if stereo
if sound.channels == 2:
print(f" * Input is stereo; need to downmix to mono.")
sound = sound.set_channels(1)
# Resample to 16000 Hz if higher
if sound.frame_rate > 16000:
print(f" * Input is {sound.frame_rate} Hz, need to resample to 16000 Hz")
sound = sound.set_frame_rate(16000)
# Export the processed audio
print(f" * Exporting to {RESAMPLED_FILE_NAME}")
sound.export(RESAMPLED_FILE_NAME, format="wav")
# Open the text file and read its contents
with open(args["lyricsfile"]) as f:
# Read the file contents
lines = f.readlines()
# Remove blank lines and unwanted characters
cleaned_lines = [
line.strip().replace(',', '').replace('.', '').replace('!', '').replace('?', '')
for line in lines
if line.strip() # Filter out blank lines
]
# Append " |" to each valid line if append_pipe is True
if args["append_pipe"]:
cleaned_lines = [line + " |" for line in cleaned_lines]
# Join the cleaned lines into a single string
cleaned_text = '\n'.join(cleaned_lines)
#write the manifest
manifest_filepath = f"{fullaudiopath}.manifest.json"
manifest_data = {
"audio_filepath": f"{RESAMPLED_FILE_NAME}",
"text": cleaned_text
}
print (f"Writing manifest to {manifest_filepath}")
with open(manifest_filepath, 'w') as f:
line = json.dumps(manifest_data)
f.write(line + "\n")