speech-to-bash-prompt

Jef808 · Jan 8, 2024 · 8d6dfb2 · 8d6dfb2
1 parent 44b7c6a
commit 8d6dfb2
Show file tree

Hide file tree

Showing 7 changed files with 159 additions and 136 deletions.
diff --git a/echo-crafter.el b/echo-crafter.el
@@ -28,6 +28,8 @@
 (defvar microphone-stream-buffer nil
   "Buffer for the output of the microphone stream.")
 
+(defvar openai-model "gpt-4")
+
 (defun start-microphone-stream ()
   "Start microphone stream subprocess."
   (interactive)
@@ -63,7 +65,9 @@
         (message "Microphone stream finished with exit code %d" (process-exit-status process))))))
 
 (defun send-output-to-openai ()
-  (setq openai-prompt-process (start-process-shell-command "openai-prompt-process" "*openai-prompt output*" "/home/jfa/projects/echo-crafter/run-make-prompt.sh"))
+  (setq openai-prompt-process (start-process-shell-command "openai-prompt-process"
+                                                           "*openai-prompt output*"
+                                                           "/home/jfa/projects/echo-crafter/run-make-prompt.sh"))
   (set-process-sentinel openai-prompt-process 'openai-prompt-process-sentinel)
   (send-buffer-contents-to-process microphone-stream-buffer openai-prompt-process))
 

diff --git a/make-prompt/dumpf_s.py b/make-prompt/dumpf_s.py
diff --git a/make-prompt/main.py b/make-prompt/main.py
diff --git a/make-prompt/shell-script.py b/make-prompt/shell-script.py
diff --git a/make-prompt/shell.py b/make-prompt/shell.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+
+import argparse
+import subprocess
+import json
+import sys
+from openai import OpenAI
+
+
+DEFAULT_MODEL = "gpt-4-1106-preview"
+
+parser = argparse.ArgumentParser(description='Process some arguments.')
+parser.add_argument('--model', type=str,
+                    help='Model to use.',
+                    default=DEFAULT_MODEL)
+
+
+def make_payload(args, prompt):
+    system_prompt = ("You will be assigned a user command. Your mission is to generate a"
+                     " zsh shell script that, when executed in an Arch Linux environment,"
+                     " will run the command.\nDo not explain yourself or output anything else.")
+    example = [{"role": "user", "content": "Command: Give me the absolute path to the home directory."},
+               {"role": "assistant", "content": '```shell\necho $HOME\n```'}]
+    payload = {
+        "model": args.model,
+        "messages": [
+            {"role": "system", "content": system_prompt}
+        ]
+    }
+    payload['messages'].extend(example)
+    payload['messages'].append({"role": "user", "content": f"Command: {prompt}"})
+    return payload
+
+
+def get_api_key():
+    p_api_key = subprocess.run(["pass", "openai.com/api_key"],
+                               capture_output=True)
+    if not p_api_key.stdout:
+        print("ERROR: Failed to retrieve openai.com/api_key pass entry",
+              file=sys.stderr)
+        sys.exit(3)
+    return str(p_api_key.stdout, encoding="utf-8").strip()
+
+
+def format_response(content):
+    result = []
+    between_backticks = False
+    for line in content.split('\n'):
+        if line.strip().startswith("```"):
+            between_backticks = not between_backticks
+            continue
+        if between_backticks:
+            result.append(line)
+    response = '\n'.join(result) if result else content
+    return response
+
+
+if __name__ == '__main__':
+    openai_client = OpenAI(api_key=get_api_key())
+
+    args = parser.parse_args()
+
+    prompt = input()
+
+    payload = make_payload(args, prompt)
+
+    print(f"[PAYLOAD]: {json.dumps(payload, indent=2)}", file=sys.stderr)
+
+    response = openai_client.chat.completions.create(**payload)
+    py_response = response.model_dump()
+
+    print(f"[RESPONSE]: {json.dumps(py_response, indent=2)}", file=sys.stderr)
+
+    content = py_response['choices'][0]['message']['content']
+
+    print(format_response(content))
diff --git a/speech-command b/speech-command
@@ -0,0 +1,3 @@
+#!/usr/bin/env sh
+
+/home/jfa/projects/echo-crafter/.venv/bin/python /home/jfa/projects/echo-crafter/speech-to-text/speech_to_text.py | /home/jfa/projects/echo-crafter/make-prompt/shell.py | xargs -I {} xdotool type "{}"
diff --git a/speech-to-text/speech_to_text.py b/speech-to-text/speech_to_text.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+from transformers import pipeline
+from transformers.pipelines.audio_utils import ffmpeg_microphone_live
+import numpy as np
+import torch
+import sys
+
+SILENCE_THRESHOLD = 0.01  # Threshold for silence detection
+SILENCE_DURATION = 1.2    # Duration of silence to consider as end-of-speech (seconds)
+CHUNK = 3200              # Number of audio frames per buffer
+
+
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+classifier = pipeline(
+    "audio-classification", model="mit/ast-finetuned-speech-commands-v2", device=device
+)
+
+
+def launch_fn(
+    wake_word="marvin",
+    prob_threshold=0.8,
+    chunk_length_s=2.0,
+    stream_chunk_s=0.2,
+    debug=False,
+):
+    if wake_word not in classifier.model.config.label2id.keys():
+        raise ValueError(
+            f"Wake word {wake_word} not in set of valid class labels, pick a wake word in the set {classifier.model.config.label2id.keys()}."
+        )
+
+    sampling_rate = classifier.feature_extractor.sampling_rate
+
+    mic = ffmpeg_microphone_live(
+        sampling_rate=sampling_rate,
+        chunk_length_s=chunk_length_s,
+        stream_chunk_s=stream_chunk_s,
+    )
+
+    print("Listening for wake word...", file=sys.stderr)
+    for prediction in classifier(mic):
+        prediction = prediction[0]
+        if debug:
+            print(prediction, file=sys.stderr)
+        if prediction["label"] == wake_word:
+            if prediction["score"] > prob_threshold:
+                return True
+
+
+def transcribe(chunk_length_s=10.0, stream_chunk_s=1.0, *, debug=False):
+    transcriber = pipeline(
+        "automatic-speech-recognition", model="openai/whisper-small.en", device=device
+    )
+
+    sampling_rate = transcriber.feature_extractor.sampling_rate
+
+    mic = ffmpeg_microphone_live(
+        sampling_rate=sampling_rate,
+        chunk_length_s=chunk_length_s,
+        stream_chunk_s=stream_chunk_s,
+    )
+
+    print("Listening for command...", file=sys.stderr)
+    for item in transcriber(mic, generate_kwargs={"max_new_tokens": 128}):
+        if (debug):
+            print(item["text"], end="\n", file=sys.stderr)
+        if not item["partial"][0]:
+            break
+
+    return item["text"]
+
+
+if __name__ == '__main__':
+    launch_fn(debug=False)
+    transcription = transcribe()
+    print(transcription)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		#!/usr/bin/env sh

		/home/jfa/projects/echo-crafter/.venv/bin/python /home/jfa/projects/echo-crafter/speech-to-text/speech_to_text.py \| /home/jfa/projects/echo-crafter/make-prompt/shell.py \| xargs -I {} xdotool type "{}"