initial version of voice interaction with ChatGPT

This commit is contained in:
Lars Haferkamp 2023-04-21 22:17:24 +02:00
parent 6fdab94f1b
commit c6e0a995be
10 changed files with 241 additions and 0 deletions

25
chatbot/chat/chatgpt.py Normal file
View file

@ -0,0 +1,25 @@
import os
import openai
def chat(message, openai_key, shortAnswer=True):
print("###################")
openai.api_key = openai_key
if shortAnswer:
message = message + " - eine kurze Antwort bitte!"
print(f"Asking ChatGPT for an answer to: \n {message} \n ...")
completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "user", "content": message}
]
)
first_answer = completion.choices[0].message.content
print(f"Got answer from ChatGPT:\n {first_answer}")
return first_answer
if __name__ == "__main__":
answer = chat("Erzähle mir was über Murnau")
print(answer)

View file

@ -0,0 +1,6 @@
{
"OPENAI_KEY": "xxxxxxxxx",
"MIC_DEVICE_ID": 0,
"SECONDS_RECORDING": 5,
"ESPEAK_VOICE": "de+f4"
}

21
chatbot/chat/main.py Normal file
View file

@ -0,0 +1,21 @@
import os
import json
import speech_recognition
import speech_synthesis
import chatgpt
import record_audio
if __name__ == "__main__":
dirname = os.path.dirname(__file__)
with open(os.path.join(dirname, "config.json")) as config_file:
config = json.load(config_file)
openai_key=config['OPENAI_KEY']
record_audio.record(dirname,
device_id=config["MIC_DEVICE_ID"],
max_recording_time_s=config["SECONDS_RECORDING"])
transcribed = speech_recognition.transcribe(dirname, openai_key)
answer = chatgpt.chat(transcribed, openai_key)
#Idea: recognize language and pass it to speech synthesizer. Unfortunately the detected language currently not returned by the OpenAI API
speech_synthesis.speak(answer, voice=config["ESPEAK_VOICE"])

View file

@ -0,0 +1,54 @@
import os
import pyaudio
import wave
def record(dirname, device_id=0, max_recording_time_s=3):
print("###################")
fname = os.path.join(dirname, "recorded.wav")
form_1 = pyaudio.paInt16 # 16-bit resolution
chans = 1 # 1 channel
samp_rate = 44100 # 44.1kHz sampling rate
chunk = 4096 # 2^12 samples for buffer
record_secs = max_recording_time_s # seconds to record
audio = pyaudio.PyAudio() # create pyaudio instantiation
# create pyaudio stream
stream = audio.open(format = form_1,rate = samp_rate,channels = chans, \
input_device_index = device_id,input = True, \
frames_per_buffer=chunk)
print(f"Recording via microphone for {max_recording_time_s} seconds")
frames = []
# loop through stream and append audio chunks to frame array
for ii in range(0,int((samp_rate/chunk)*record_secs)):
data = stream.read(chunk)
frames.append(data)
print("Finished recording")
# stop the stream, close it, and terminate the pyaudio instantiation
stream.stop_stream()
stream.close()
audio.terminate()
# save the audio frames as .wav file
wavefile = wave.open(fname,'wb')
wavefile.setnchannels(chans)
wavefile.setsampwidth(audio.get_sample_size(form_1))
wavefile.setframerate(samp_rate)
wavefile.writeframes(b''.join(frames))
wavefile.close()
def get_mics():
p = pyaudio.PyAudio()
for ii in range(p.get_device_count()):
device_name = p.get_device_info_by_index(ii).get('name')
print(f"{ii}: {device_name}")
if __name__ == "__main__":
get_mics()
dirname = os.path.dirname(__file__)
record(dirname, device_id=0, max_recording_time_s=5)

View file

@ -0,0 +1,25 @@
import os
import json
import openai
def transcribe(dirname, openai_key, file = "recorded.wav"):
print("###################")
openai.api_key = openai_key
fname = os.path.join(dirname, file)
audio_file = open(fname, "rb")
print(f"Transcribing audio via OpenAI Whisper ...")
transcript = openai.Audio.transcribe("whisper-1", audio_file)
recognized_text = transcript.text
print(f"Recognized text: \n > {recognized_text}")
return recognized_text
if __name__ == "__main__":
dirname = os.path.dirname(__file__)
with open(os.path.join(dirname, "config.json")) as config_file:
config = json.load(config_file)
openai_key=config['OPENAI_KEY']
transcribed = transcribe(dirname, openai_key, file="test.m4a")
print(transcribed)

View file

@ -0,0 +1,14 @@
import subprocess
def speak(text, voice="de+f4"):
print("###################")
print("Generating audio from text")
cmd = f'espeak -v{voice} -p20 -s150 "{text}"'
process_return = subprocess.call(cmd, shell=True)
#print(process_return)
if __name__ == "__main__":
speak("Murnau ist eine kleine Stadt in Bayern, Deutschland, bekannt für seine atemberaubende Natur, \
insbesondere den Murnauer Moos Nationalpark, sowie für seine malerische Architektur und deutsche Kultur.",
voice="de+klatt2")