Significant-Gravitas · swooshcode · May 6, 2023 · May 6, 2023 · May 6, 2023 · May 6, 2023
diff --git a/.DS_Store b/.DS_Store
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -8,6 +8,7 @@
 /src/autogpt_plugins/news_search @PalAditya
 /src/autogpt_plugins/random_values @sidewaysthought
 /src/autogpt_plugins/scenex @delgermurun
+/src/autogpt_plugins/speech_to_text @swooshcode
 /src/autogpt_plugins/telegram @wladastic
 /src/autogpt_plugins/twitter @desojo
 /src/autogpt_plugins/wikipedia_search @pierluigi-failla

diff --git a/src/.DS_Store b/src/.DS_Store
diff --git a/src/autogpt_plugins/.DS_Store b/src/autogpt_plugins/.DS_Store
diff --git a/src/autogpt_plugins/speech_to_text/.DS_Store b/src/autogpt_plugins/speech_to_text/.DS_Store
diff --git a/src/autogpt_plugins/speech_to_text/README.md b/src/autogpt_plugins/speech_to_text/README.md
@@ -0,0 +1,65 @@
+# Changes: 
+This repository contains various plugins developed for use with the AutoGPT model. These plugins extend the functionality of AutoGPT by providing additional features, such as speech-to-text transcription, integration with external APIs, and more.
+
+## Table of Contents
+
+1. [Speech-to-Text Plugin](#speech-to-text-plugin)
+2. [Installation](#installation)
+3. [Contributing](#contributing)
+4. [License](#license)
+
+## Speech-to-Text Plugin
+
+The speech-to-text plugin allows users to transcribe spoken input in real-time and feed the transcribed text into the AutoGPT model for processing. This plugin uses the Google Cloud Speech-to-Text API for transcription and PyAudio for real-time audio recording from the user's microphone.
+
+### Features
+
+- Real-time audio recording from the user's microphone
+- Transcription of spoken input using Google Cloud Speech-to-Text API
+- Integration with the AutoGPT model for processing transcribed text
+
+### Usage
+
+1. Set up the Google Cloud Speech-to-Text API and obtain your API credentials as a JSON file.
+2. Update the `speech_to_text_plugin.py` file to use the correct path to your API credentials.
+3. Install the required dependencies: `pip install google-cloud-speech pyaudio`
+4. Run the `speech_to_text_plugin.py` file to start recording and transcribing audio input.
+
+## Installation
+
+To install the plugins, follow these steps:
+
+1. Clone this repository: `git clone https://github.com/Significant-Gravitas/Auto-GPT`
+2. Navigate to the `src/autogpt_plugins` directory.
+3. Install the required dependencies for each plugin as specified in their respective README files or source code comments.
+
+## Contributors
+
+Nigel Phillips a.k.a. Swooshcode
+Software Developer
+Founder of Frame Tech Solutions Ltd., Co. 框架技術解決方案
+For inquiries: https://tinyurl.com/nigelphillips
+
+## License
+
+MIT License
+
+Copyright (c) 2023 Toran Bruce Richards
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/src/autogpt_plugins/speech_to_text/__init__.py b/src/autogpt_plugins/speech_to_text/__init__.py
@@ -0,0 +1,34 @@
+from typing import Any, Dict, List, Optional, Tuple, TypedDict, TypeVar
+
+from auto_gpt_plugin_template import AutoGPTPluginTemplate
+from .speech_to_text_plugin import transcribe_audio
+
+PromptGenerator = TypeVar("PromptGenerator")
+
+class SpeechToTextPlugin(AutoGPTPluginTemplate):
+ """
+ This is the Auto-GPT Speech-to-Text plugin.
+ """
+
+ def __init__(self):
+ super().__init__()
+ self._name = "Auto-GPT-Speech-to-Text-Plugin"
+ self._version = "0.0.1"
+ self._description = "Auto-GPT Speech-to-Text Plugin: Transcribe spoken input in real-time."
+
+ def can_handle_post_prompt(self) -> bool:
+ return True
+
+ def post_prompt(self, prompt: PromptGenerator) -> PromptGenerator:
+ prompt.add_command(
+ "Transcribe spoken input",
+ "transcribe_audio",
+ {
+ "audio": "<audio>",
+ },
+ transcribe_audio,
+ )
+ return prompt
+
+ # Add more methods as needed, such as can_handle_on_response, on_response, etc.
+
diff --git a/src/autogpt_plugins/speech_to_text/speech_to_text_plugin/speech_to_text_plugin.py b/src/autogpt_plugins/speech_to_text/speech_to_text_plugin/speech_to_text_plugin.py
@@ -0,0 +1,36 @@
+from typing import Any, Dict, List, Optional, Tuple, TypedDict, TypeVar
+
+from auto_gpt_plugin_template import AutoGPTPluginTemplate
+from .speech_to_text_plugin import transcribe_streaming, record_audio, process_transcribed_text
+import io
+
+PromptGenerator = TypeVar("PromptGenerator")
+
+class SpeechToTextPlugin(AutoGPTPluginTemplate):
+ """
+ This is the Auto-GPT Speech-to-Text plugin.
+ """
+
+ def __init__(self):
+ super().__init__()
+ self._name = "Auto-GPT-Speech-to-Text-Plugin"
+ self._version = "0.0.1"
+ self._description = "Auto-GPT Speech-to-Text Plugin: Transcribe spoken input in real-time."
+
+ def can_handle_post_prompt(self) -> bool:
+ return True
+
+ def post_prompt(self, prompt: PromptGenerator) -> PromptGenerator:
+ # Record audio from the built-in microphone
+ audio_data = next(record_audio())
+
+ # Transcribe the audio data using Google Speech-to-Text
+ transcript = transcribe_streaming(io.BytesIO(audio_data))
+ if transcript:
+ # Process the transcribed text
+ processed_text = process_transcribed_text(transcript)
+
+ # Add the processed text to the prompt
+ prompt.add_text(processed_text)
+
+ return prompt
diff --git a/src/autogpt_plugins/speech_to_text/speech_to_text_plugin/test_speech_to_text_plugin.py b/src/autogpt_plugins/speech_to_text/speech_to_text_plugin/test_speech_to_text_plugin.py
@@ -0,0 +1,42 @@
+import unittest
+from unittest.mock import patch, MagicMock
+import io
+import speech_to_text_plugin
+from google.cloud.speech_v1p1beta1 import types
+
+class TestSpeechToTextPlugin(unittest.TestCase):
+
+ def test_transcribe_streaming(self):
+ sample_audio = io.BytesIO(b'sample_audio_data')
+ sample_transcript = "This is a sample transcript."
+
+ with patch("speech_to_text_plugin.speech.SpeechClient") as mock_client:
+ mock_instance = MagicMock()
+ mock_instance.streaming_recognize.return_value = [
+ types.StreamingRecognizeResponse(
+ results=[
+ types.StreamingRecognitionResult(
+ alternatives=[
+ types.SpeechRecognitionAlternative(transcript=sample_transcript)
+ ]
+ )
+ ]
+ )
+ ]
+ mock_client.return_value = mock_instance
+ transcript = speech_to_text_plugin.transcribe_streaming(sample_audio)
+
+ self.assertEqual(transcript, sample_transcript)
+
+ def test_process_transcribed_text(self):
+ sample_transcript = "This is a sample transcript."
+ sample_processed_text = "This is a sample processed text."
+
+ with patch("speech_to_text_plugin.autogpt.process_input") as mock_process_input:
+ mock_process_input.return_value = sample_processed_text
+ processed_text = speech_to_text_plugin.process_transcribed_text(sample_transcript)
+
+ self.assertEqual(processed_text, sample_processed_text)
+
+if __name__ == '__main__':
+ unittest.main()