Skip to content

Commit

Permalink
speech-to-bash-prompt
Browse files Browse the repository at this point in the history
  • Loading branch information
Jef808 committed Jan 8, 2024
1 parent 44b7c6a commit 8d6dfb2
Show file tree
Hide file tree
Showing 7 changed files with 159 additions and 136 deletions.
6 changes: 5 additions & 1 deletion echo-crafter.el
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
(defvar microphone-stream-buffer nil
"Buffer for the output of the microphone stream.")

(defvar openai-model "gpt-4")

(defun start-microphone-stream ()
"Start microphone stream subprocess."
(interactive)
Expand Down Expand Up @@ -63,7 +65,9 @@
(message "Microphone stream finished with exit code %d" (process-exit-status process))))))

(defun send-output-to-openai ()
(setq openai-prompt-process (start-process-shell-command "openai-prompt-process" "*openai-prompt output*" "/home/jfa/projects/echo-crafter/run-make-prompt.sh"))
(setq openai-prompt-process (start-process-shell-command "openai-prompt-process"
"*openai-prompt output*"
"/home/jfa/projects/echo-crafter/run-make-prompt.sh"))
(set-process-sentinel openai-prompt-process 'openai-prompt-process-sentinel)
(send-buffer-contents-to-process microphone-stream-buffer openai-prompt-process))

Expand Down
2 changes: 0 additions & 2 deletions make-prompt/dumpf_s.py

This file was deleted.

56 changes: 0 additions & 56 deletions make-prompt/main.py

This file was deleted.

77 changes: 0 additions & 77 deletions make-prompt/shell-script.py

This file was deleted.

76 changes: 76 additions & 0 deletions make-prompt/shell.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env python

import argparse
import subprocess
import json
import sys
from openai import OpenAI


DEFAULT_MODEL = "gpt-4-1106-preview"

parser = argparse.ArgumentParser(description='Process some arguments.')
parser.add_argument('--model', type=str,
help='Model to use.',
default=DEFAULT_MODEL)


def make_payload(args, prompt):
system_prompt = ("You will be assigned a user command. Your mission is to generate a"
" zsh shell script that, when executed in an Arch Linux environment,"
" will run the command.\nDo not explain yourself or output anything else.")
example = [{"role": "user", "content": "Command: Give me the absolute path to the home directory."},
{"role": "assistant", "content": '```shell\necho $HOME\n```'}]
payload = {
"model": args.model,
"messages": [
{"role": "system", "content": system_prompt}
]
}
payload['messages'].extend(example)
payload['messages'].append({"role": "user", "content": f"Command: {prompt}"})
return payload


def get_api_key():
p_api_key = subprocess.run(["pass", "openai.com/api_key"],
capture_output=True)
if not p_api_key.stdout:
print("ERROR: Failed to retrieve openai.com/api_key pass entry",
file=sys.stderr)
sys.exit(3)
return str(p_api_key.stdout, encoding="utf-8").strip()


def format_response(content):
result = []
between_backticks = False
for line in content.split('\n'):
if line.strip().startswith("```"):
between_backticks = not between_backticks
continue
if between_backticks:
result.append(line)
response = '\n'.join(result) if result else content
return response


if __name__ == '__main__':
openai_client = OpenAI(api_key=get_api_key())

args = parser.parse_args()

prompt = input()

payload = make_payload(args, prompt)

print(f"[PAYLOAD]: {json.dumps(payload, indent=2)}", file=sys.stderr)

response = openai_client.chat.completions.create(**payload)
py_response = response.model_dump()

print(f"[RESPONSE]: {json.dumps(py_response, indent=2)}", file=sys.stderr)

content = py_response['choices'][0]['message']['content']

print(format_response(content))
3 changes: 3 additions & 0 deletions speech-command
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/usr/bin/env sh

/home/jfa/projects/echo-crafter/.venv/bin/python /home/jfa/projects/echo-crafter/speech-to-text/speech_to_text.py | /home/jfa/projects/echo-crafter/make-prompt/shell.py | xargs -I {} xdotool type "{}"
75 changes: 75 additions & 0 deletions speech-to-text/speech_to_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/usr/bin/env python3
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_microphone_live
import numpy as np
import torch
import sys

SILENCE_THRESHOLD = 0.01 # Threshold for silence detection
SILENCE_DURATION = 1.2 # Duration of silence to consider as end-of-speech (seconds)
CHUNK = 3200 # Number of audio frames per buffer


device = "cuda:0" if torch.cuda.is_available() else "cpu"
classifier = pipeline(
"audio-classification", model="mit/ast-finetuned-speech-commands-v2", device=device
)


def launch_fn(
wake_word="marvin",
prob_threshold=0.8,
chunk_length_s=2.0,
stream_chunk_s=0.2,
debug=False,
):
if wake_word not in classifier.model.config.label2id.keys():
raise ValueError(
f"Wake word {wake_word} not in set of valid class labels, pick a wake word in the set {classifier.model.config.label2id.keys()}."
)

sampling_rate = classifier.feature_extractor.sampling_rate

mic = ffmpeg_microphone_live(
sampling_rate=sampling_rate,
chunk_length_s=chunk_length_s,
stream_chunk_s=stream_chunk_s,
)

print("Listening for wake word...", file=sys.stderr)
for prediction in classifier(mic):
prediction = prediction[0]
if debug:
print(prediction, file=sys.stderr)
if prediction["label"] == wake_word:
if prediction["score"] > prob_threshold:
return True


def transcribe(chunk_length_s=10.0, stream_chunk_s=1.0, *, debug=False):
transcriber = pipeline(
"automatic-speech-recognition", model="openai/whisper-small.en", device=device
)

sampling_rate = transcriber.feature_extractor.sampling_rate

mic = ffmpeg_microphone_live(
sampling_rate=sampling_rate,
chunk_length_s=chunk_length_s,
stream_chunk_s=stream_chunk_s,
)

print("Listening for command...", file=sys.stderr)
for item in transcriber(mic, generate_kwargs={"max_new_tokens": 128}):
if (debug):
print(item["text"], end="\n", file=sys.stderr)
if not item["partial"][0]:
break

return item["text"]


if __name__ == '__main__':
launch_fn(debug=False)
transcription = transcribe()
print(transcription)

0 comments on commit 8d6dfb2

Please sign in to comment.