Significant-Gravitas · swooshcode · May 6, 2023 · May 6, 2023 · May 6, 2023 · May 6, 2023
diff --git a/.DS_Store b/.DS_Store
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -6,4 +6,5 @@
 /src/autogpt_plugins/bing_search @ForestLinSen
 /src/autogpt_plugins/news_search @PalAditya
 /src/autogpt_plugins/wikipedia_search @pierluigi-failla
-/src/autogpt_plugins/random_values @sidewaysthought
+/src/autogpt_plugins/random_values @sidewaysthought
+/src/autogpt_plugins/speech_to_text @swooshcode
diff --git a/src/.DS_Store b/src/.DS_Store
diff --git a/src/autogpt_plugins/.DS_Store b/src/autogpt_plugins/.DS_Store
diff --git a/src/autogpt_plugins/speech_to_text/.DS_Store b/src/autogpt_plugins/speech_to_text/.DS_Store
diff --git a/src/autogpt_plugins/speech_to_text/README.md b/src/autogpt_plugins/speech_to_text/README.md
@@ -0,0 +1,65 @@
+# Changes: 
+This repository contains various plugins developed for use with the AutoGPT model. These plugins extend the functionality of AutoGPT by providing additional features, such as speech-to-text transcription, integration with external APIs, and more.
+
+## Table of Contents
+
+1. [Speech-to-Text Plugin](#speech-to-text-plugin)
+2. [Installation](#installation)
+3. [Contributing](#contributing)
+4. [License](#license)
+
+## Speech-to-Text Plugin
+
+The speech-to-text plugin allows users to transcribe spoken input in real-time and feed the transcribed text into the AutoGPT model for processing. This plugin uses the Google Cloud Speech-to-Text API for transcription and PyAudio for real-time audio recording from the user's microphone.
+
+### Features
+
+- Real-time audio recording from the user's microphone
+- Transcription of spoken input using Google Cloud Speech-to-Text API
+- Integration with the AutoGPT model for processing transcribed text
+
+### Usage
+
+1. Set up the Google Cloud Speech-to-Text API and obtain your API credentials as a JSON file.
+2. Update the `speech_to_text_plugin.py` file to use the correct path to your API credentials.
+3. Install the required dependencies: `pip install google-cloud-speech pyaudio`
+4. Run the `speech_to_text_plugin.py` file to start recording and transcribing audio input.
+
+## Installation
+
+To install the plugins, follow these steps:
+
+1. Clone this repository: `git clone https://github.com/Frame-Tech-Solutions-Ltd-Co/Auto-GPT-Plugins.git`
+2. Navigate to the `src/autogpt_plugins` directory.
+3. Install the required dependencies for each plugin as specified in their respective README files or source code comments.
+
+## Contributing
+
+Nigel Phillips a.k.a. Swooshcode
+Software Developer
+Founder of Frame Tech Solutions Ltd., Co. 框架技術解決方案
+For inquiries: https://tinyurl.com/nigelphillips
+
+## License
+
+MIT License
+
+Copyright (c) 2023 Toran Bruce Richards
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/src/autogpt_plugins/speech_to_text/__init__.py b/src/autogpt_plugins/speech_to_text/__init__.py
@@ -0,0 +1,34 @@
+from typing import Any, Dict, List, Optional, Tuple, TypedDict, TypeVar
+
+from auto_gpt_plugin_template import AutoGPTPluginTemplate
+from .speech_to_text_plugin import transcribe_audio
+
+PromptGenerator = TypeVar("PromptGenerator")
+
+class SpeechToTextPlugin(AutoGPTPluginTemplate):
+ """
+ This is the Auto-GPT Speech-to-Text plugin.
+ """
+
+ def __init__(self):
+ super().__init__()
+ self._name = "Auto-GPT-Speech-to-Text-Plugin"
+ self._version = "0.0.1"
+ self._description = "Auto-GPT Speech-to-Text Plugin: Transcribe spoken input in real-time."
+
+ def can_handle_post_prompt(self) -> bool:
+ return True
+
+ def post_prompt(self, prompt: PromptGenerator) -> PromptGenerator:
+ prompt.add_command(
+ "Transcribe spoken input",
+ "transcribe_audio",
+ {
+ "audio": "<audio>",
+ },
+ transcribe_audio,
+ )
+ return prompt
+
+ # Add more methods as needed, such as can_handle_on_response, on_response, etc.
+
diff --git a/src/autogpt_plugins/speech_to_text/speech_to_text_plugin/speech_to_text_plugin.py b/src/autogpt_plugins/speech_to_text/speech_to_text_plugin/speech_to_text_plugin.py
@@ -0,0 +1,52 @@
+import os
+import pyaudio
+import io
+from google.cloud import speech_v1p1beta1 as speech
+from google.cloud.speech_v1p1beta1 import enums
+from google.cloud.speech_v1p1beta1 import types
+import autogpt
+
+os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'path/to/credentials.json'
+
+def transcribe_streaming(stream):
+ client = speech.SpeechClient()
+ config = types.RecognitionConfig(
+ encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
+ sample_rate_hertz=16000,
+ language_code='en-US')
+ streaming_config = types.StreamingRecognitionConfig(config=config)
+
+ requests = (types.StreamingRecognizeRequest(audio_content=chunk) for chunk in stream)
+ responses = client.streaming_recognize(streaming_config, requests)
+
+ for response in responses:
+ if response.results:
+ return response.results[0].alternatives[0].transcript
+
+def record_audio():
+ RATE = 16000
+ CHUNK = int(RATE / 10)
+ FORMAT = pyaudio.paInt16
+ CHANNELS = 1
+
+ p = pyaudio.PyAudio()
+ stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
+
+ while True:
+ data = stream.read(CHUNK)
+ yield data
+
+def process_transcribed_text(transcript):
+ return autogpt.process_input(transcript)
+
+if __name__ == '__main__':
+ print("Recording... Press Ctrl+C to stop.")
+ try:
+ for data in record_audio():
+ transcript = transcribe_streaming(io.BytesIO(data))
+ if transcript:
+ result = process_transcribed_text(transcript)
+ print("Transcript:", transcript)
+ print("Processed:", result)
+ except KeyboardInterrupt:
+ pass
diff --git a/src/autogpt_plugins/speech_to_text/speech_to_text_plugin/test_speech_to_text_plugin.py b/src/autogpt_plugins/speech_to_text/speech_to_text_plugin/test_speech_to_text_plugin.py
@@ -0,0 +1,42 @@
+import unittest
+from unittest.mock import patch, MagicMock
+import io
+import speech_to_text_plugin
+from google.cloud.speech_v1p1beta1 import types
+
+class TestSpeechToTextPlugin(unittest.TestCase):
+
+ def test_transcribe_streaming(self):
+ sample_audio = io.BytesIO(b'sample_audio_data')
+ sample_transcript = "This is a sample transcript."
+
+ with patch("speech_to_text_plugin.speech.SpeechClient") as mock_client:
+ mock_instance = MagicMock()
+ mock_instance.streaming_recognize.return_value = [
+ types.StreamingRecognizeResponse(
+ results=[
+ types.StreamingRecognitionResult(
+ alternatives=[
+ types.SpeechRecognitionAlternative(transcript=sample_transcript)
+ ]
+ )
+ ]
+ )
+ ]
+ mock_client.return_value = mock_instance
+ transcript = speech_to_text_plugin.transcribe_streaming(sample_audio)
+
+ self.assertEqual(transcript, sample_transcript)
+
+ def test_process_transcribed_text(self):
+ sample_transcript = "This is a sample transcript."
+ sample_processed_text = "This is a sample processed text."
+
+ with patch("speech_to_text_plugin.autogpt.process_input") as mock_process_input:
+ mock_process_input.return_value = sample_processed_text
+ processed_text = speech_to_text_plugin.process_transcribed_text(sample_transcript)
+
+ self.assertEqual(processed_text, sample_processed_text)
+
+if __name__ == '__main__':
+ unittest.main()