initial version of voice interaction with ChatGPT

2023-04-21 22:17:24 +02:00 · 2023-04-21 22:17:24 +02:00 · c6e0a995be
commit c6e0a995be
parent 6fdab94f1b
10 changed files with 241 additions and 0 deletions
--- a/chatbot/chat/chatgpt.py
+++ b/chatbot/chat/chatgpt.py
@ -0,0 +1,25 @@
+import os
+import openai
+
+def chat(message, openai_key, shortAnswer=True):
+    print("###################")
+    openai.api_key = openai_key
+    if shortAnswer:
+        message = message + " - eine kurze Antwort bitte!"
+
+    print(f"Asking ChatGPT for an answer to: \n {message} \n ...")
+    completion = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "user", "content": message}
+        ]
+    )
+    first_answer = completion.choices[0].message.content
+    print(f"Got answer from ChatGPT:\n {first_answer}")
+    return first_answer
+
+if __name__ == "__main__":
+    answer = chat("Erzähle mir was über Murnau")
+    print(answer)
+
+
--- a/chatbot/chat/config-sample.json
+++ b/chatbot/chat/config-sample.json
@ -0,0 +1,6 @@
+{
+    "OPENAI_KEY": "xxxxxxxxx",
+    "MIC_DEVICE_ID": 0,
+    "SECONDS_RECORDING": 5,
+    "ESPEAK_VOICE": "de+f4"
+}
--- a/chatbot/chat/main.py
+++ b/chatbot/chat/main.py
@ -0,0 +1,21 @@
+import os
+import json
+
+import speech_recognition
+import speech_synthesis
+import chatgpt
+import record_audio
+
+if __name__ == "__main__":
+    dirname = os.path.dirname(__file__)
+    with open(os.path.join(dirname, "config.json")) as config_file:
+        config = json.load(config_file)
+        openai_key=config['OPENAI_KEY']
+        record_audio.record(dirname, 
+                            device_id=config["MIC_DEVICE_ID"], 
+                            max_recording_time_s=config["SECONDS_RECORDING"])
+        transcribed = speech_recognition.transcribe(dirname, openai_key)
+        answer = chatgpt.chat(transcribed, openai_key)
+        #Idea: recognize language and pass it to speech synthesizer. Unfortunately the detected language currently not returned by the OpenAI API
+        speech_synthesis.speak(answer, voice=config["ESPEAK_VOICE"])
+
--- a/chatbot/chat/record_audio.py
+++ b/chatbot/chat/record_audio.py
@ -0,0 +1,54 @@
+import os
+
+import pyaudio
+import wave
+
+def record(dirname, device_id=0, max_recording_time_s=3):
+    print("###################")
+    fname = os.path.join(dirname, "recorded.wav")
+
+    form_1 = pyaudio.paInt16 # 16-bit resolution
+    chans = 1 # 1 channel
+    samp_rate = 44100 # 44.1kHz sampling rate
+    chunk = 4096 # 2^12 samples for buffer
+    record_secs = max_recording_time_s # seconds to record
+
+    audio = pyaudio.PyAudio() # create pyaudio instantiation
+
+    # create pyaudio stream
+    stream = audio.open(format = form_1,rate = samp_rate,channels = chans, \
+                        input_device_index = device_id,input = True, \
+                        frames_per_buffer=chunk)
+    print(f"Recording via microphone for {max_recording_time_s} seconds")
+    frames = []
+
+    # loop through stream and append audio chunks to frame array
+    for ii in range(0,int((samp_rate/chunk)*record_secs)):
+        data = stream.read(chunk)
+        frames.append(data)
+
+    print("Finished recording")
+
+    # stop the stream, close it, and terminate the pyaudio instantiation
+    stream.stop_stream()
+    stream.close()
+    audio.terminate()
+
+    # save the audio frames as .wav file
+    wavefile = wave.open(fname,'wb')
+    wavefile.setnchannels(chans)
+    wavefile.setsampwidth(audio.get_sample_size(form_1))
+    wavefile.setframerate(samp_rate)
+    wavefile.writeframes(b''.join(frames))
+    wavefile.close()
+
+def get_mics():
+    p = pyaudio.PyAudio()
+    for ii in range(p.get_device_count()):
+        device_name = p.get_device_info_by_index(ii).get('name')
+        print(f"{ii}: {device_name}")
+
+if __name__ == "__main__":
+    get_mics()
+    dirname = os.path.dirname(__file__)
+    record(dirname, device_id=0, max_recording_time_s=5)
--- a/chatbot/chat/speech_recognition.py
+++ b/chatbot/chat/speech_recognition.py
@ -0,0 +1,25 @@
+import os
+import json
+
+import openai
+
+def transcribe(dirname, openai_key, file = "recorded.wav"):  
+    print("###################")  
+    openai.api_key = openai_key
+    fname = os.path.join(dirname, file)
+    audio_file = open(fname, "rb")
+    print(f"Transcribing audio via OpenAI Whisper ...")
+    transcript = openai.Audio.transcribe("whisper-1", audio_file)
+    recognized_text = transcript.text
+    print(f"Recognized text: \n > {recognized_text}")
+    return recognized_text
+
+if __name__ == "__main__":
+    dirname = os.path.dirname(__file__)
+    with open(os.path.join(dirname, "config.json")) as config_file:
+        config = json.load(config_file)
+        openai_key=config['OPENAI_KEY']
+        transcribed = transcribe(dirname, openai_key, file="test.m4a")
+        print(transcribed)
+
+
--- a/chatbot/chat/speech_synthesis.py
+++ b/chatbot/chat/speech_synthesis.py
@ -0,0 +1,14 @@
+import subprocess
+
+def speak(text, voice="de+f4"):
+    print("###################")
+    print("Generating audio from text")
+    cmd = f'espeak -v{voice} -p20 -s150 "{text}"'
+    process_return = subprocess.call(cmd, shell=True)
+    #print(process_return)
+
+
+if __name__ == "__main__":
+    speak("Murnau ist eine kleine Stadt in Bayern, Deutschland, bekannt für seine atemberaubende Natur, \
+          insbesondere den Murnauer Moos Nationalpark, sowie für seine malerische Architektur und deutsche Kultur.",
+          voice="de+klatt2")