-
Notifications
You must be signed in to change notification settings - Fork 1
/
_local_transcriber.py
106 lines (80 loc) ยท 3.23 KB
/
_local_transcriber.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# -*- coding: utf-8 -*-
#
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# DO NOT EDIT! This is a generated sample ("LongRunningPromise", "speech_transcribe_async")
# To install the latest published package dependency, execute the following:
# pip install google-cloud-speech
# sample-metadata
# title: Transcribe Audio File using Long Running Operation (Local File) (LRO)
# description: Transcribe a long audio file using asynchronous speech recognition
# usage: python3 samples/v1/speech_transcribe_async.py [--local_file_path "resources/brooklyn_bridge.raw"]
# [START speech_transcribe_async]
# -*- coding:utf-8 -*-
from google.cloud.speech_v1 import enums
from google.cloud import speech_v1
import io
import os
os.environ[
"GOOGLE_APPLICATION_CREDENTIALS"
] = r"/Users/noopy/ghoststation_transcript/credentials.json"
def sample_long_running_recognize(local_file_path):
"""
Transcribe a long audio file using asynchronous speech recognition
Args:
local_file_path Path to local audio file, e.g. /path/audio.wav
"""
client = speech_v1.SpeechClient()
# local_file_path = 'resources/brooklyn_bridge.raw'
# # The language of the supplied audio
# language_code = "en-US"
# # Sample rate in Hertz of the audio data sent
# sample_rate_hertz = 16000
# # Encoding of audio data sent. This sample sets this explicitly.
# # This field is optional for FLAC and WAV audio formats.
# encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16
# config = {
# "language_code": language_code,
# "sample_rate_hertz": sample_rate_hertz,
# "encoding": encoding,
# }
audio_channel_count = 2
enable_separate_recognition_per_channel = True
language_code = "ko-KR"
config = {
"audio_channel_count": audio_channel_count,
"enable_separate_recognition_per_channel": enable_separate_recognition_per_channel,
"language_code": language_code,
}
with io.open(local_file_path, "rb") as f:
content = f.read()
audio = {"content": content}
operation = client.long_running_recognize(config, audio)
print(u"Waiting for operation to complete...")
response = operation.result()
for result in response.results:
# First alternative is the most probable result
alternative = result.alternatives[0]
print(u"Transcript: {}".format(alternative.transcript))
# [END speech_transcribe_async]
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"--local_file_path", type=str, default="transformed_flac/test.flac"
)
args = parser.parse_args()
sample_long_running_recognize(args.local_file_path)
if __name__ == "__main__":
main()