initial version of voice interaction with ChatGPT
This commit is contained in:
parent
6fdab94f1b
commit
c6e0a995be
10 changed files with 241 additions and 0 deletions
12
chatbot/.gitignore
vendored
Normal file
12
chatbot/.gitignore
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
# Python
|
||||
venv
|
||||
__pycache__
|
||||
|
||||
# Project specific
|
||||
config.json
|
||||
|
||||
# Media files, Zip files
|
||||
*.wav
|
||||
*.m4a
|
||||
*.mov
|
||||
*.zip
|
1
chatbot/.python-version
Normal file
1
chatbot/.python-version
Normal file
|
@ -0,0 +1 @@
|
|||
3.8
|
80
chatbot/Readme.md
Normal file
80
chatbot/Readme.md
Normal file
|
@ -0,0 +1,80 @@
|
|||
# Setup and run
|
||||
|
||||
Within `chat` directory
|
||||
|
||||
pyenv local 3.8
|
||||
python -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
|
||||
Install ESpeak, see below
|
||||
|
||||
**Set config:**
|
||||
|
||||
In `chat` folder copy file `config-sample.json` to new file with name `config.json` and set especially the OpenAI API Key,
|
||||
the other values can be kept as default.
|
||||
|
||||
See section "Recording Audio" for getting correct Device ID on Raspberry. You can execute `record_audio.py` to see a list of available devices with IDs.
|
||||
|
||||
**Run:**
|
||||
|
||||
Execute `main.py` e.g.
|
||||
|
||||
python -m main
|
||||
|
||||
# Recording Audio
|
||||
|
||||
## Python package PyAudio
|
||||
|
||||
Installed via pip
|
||||
|
||||
For more examples, e.g. conversion into MP3 see
|
||||
https://realpython.com/playing-and-recording-sound-python/#recording-audio
|
||||
|
||||
## Raspberry Pi
|
||||
|
||||
Setup microphone and Device Id:
|
||||
|
||||
https://makersportal.com/blog/2018/8/23/recording-audio-on-the-raspberry-pi-with-python-and-a-usb-microphone
|
||||
|
||||
|
||||
# Speech Synthesis
|
||||
|
||||
## Raspberry/Linux: Pico TTS
|
||||
|
||||
https://github.com/cyberbotics/picotts
|
||||
|
||||
Pico TTS for Raspberry:
|
||||
https://cstan.io/post/2020/03/pico-tts-unter-raspbian-uebersetzen/
|
||||
|
||||
|
||||
## Mac
|
||||
|
||||
### ESpeak
|
||||
See https://espeak.sourceforge.net/
|
||||
|
||||
Verbesserung: MBrola Voices für ESpeak (https://github.com/numediart/MBROLA)
|
||||
|
||||
|
||||
Install
|
||||
|
||||
brew install espeak
|
||||
|
||||
Test
|
||||
|
||||
espeak -vde -p20 -k20 "Hallo Welt"
|
||||
|
||||
MBRola für Mac: https://github.com/pettarin/setup-festival-mbrola
|
||||
|
||||
### Maybe Pico TTS could run on Mac
|
||||
https://github.com/cyberbotics/webots/wiki/Pico-Compilation
|
||||
|
||||
|
||||
# Audio for Raspberry
|
||||
|
||||
https://naomiproject.github.io/docs/
|
||||
|
||||
|
||||
# Similar projects
|
||||
|
||||
https://github.com/anatolybazarov/oracle.sh
|
25
chatbot/chat/chatgpt.py
Normal file
25
chatbot/chat/chatgpt.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
import os
|
||||
import openai
|
||||
|
||||
def chat(message, openai_key, shortAnswer=True):
|
||||
print("###################")
|
||||
openai.api_key = openai_key
|
||||
if shortAnswer:
|
||||
message = message + " - eine kurze Antwort bitte!"
|
||||
|
||||
print(f"Asking ChatGPT for an answer to: \n {message} \n ...")
|
||||
completion = openai.ChatCompletion.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{"role": "user", "content": message}
|
||||
]
|
||||
)
|
||||
first_answer = completion.choices[0].message.content
|
||||
print(f"Got answer from ChatGPT:\n {first_answer}")
|
||||
return first_answer
|
||||
|
||||
if __name__ == "__main__":
|
||||
answer = chat("Erzähle mir was über Murnau")
|
||||
print(answer)
|
||||
|
||||
|
6
chatbot/chat/config-sample.json
Normal file
6
chatbot/chat/config-sample.json
Normal file
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"OPENAI_KEY": "xxxxxxxxx",
|
||||
"MIC_DEVICE_ID": 0,
|
||||
"SECONDS_RECORDING": 5,
|
||||
"ESPEAK_VOICE": "de+f4"
|
||||
}
|
21
chatbot/chat/main.py
Normal file
21
chatbot/chat/main.py
Normal file
|
@ -0,0 +1,21 @@
|
|||
import os
|
||||
import json
|
||||
|
||||
import speech_recognition
|
||||
import speech_synthesis
|
||||
import chatgpt
|
||||
import record_audio
|
||||
|
||||
if __name__ == "__main__":
|
||||
dirname = os.path.dirname(__file__)
|
||||
with open(os.path.join(dirname, "config.json")) as config_file:
|
||||
config = json.load(config_file)
|
||||
openai_key=config['OPENAI_KEY']
|
||||
record_audio.record(dirname,
|
||||
device_id=config["MIC_DEVICE_ID"],
|
||||
max_recording_time_s=config["SECONDS_RECORDING"])
|
||||
transcribed = speech_recognition.transcribe(dirname, openai_key)
|
||||
answer = chatgpt.chat(transcribed, openai_key)
|
||||
#Idea: recognize language and pass it to speech synthesizer. Unfortunately the detected language currently not returned by the OpenAI API
|
||||
speech_synthesis.speak(answer, voice=config["ESPEAK_VOICE"])
|
||||
|
54
chatbot/chat/record_audio.py
Normal file
54
chatbot/chat/record_audio.py
Normal file
|
@ -0,0 +1,54 @@
|
|||
import os
|
||||
|
||||
import pyaudio
|
||||
import wave
|
||||
|
||||
def record(dirname, device_id=0, max_recording_time_s=3):
|
||||
print("###################")
|
||||
fname = os.path.join(dirname, "recorded.wav")
|
||||
|
||||
form_1 = pyaudio.paInt16 # 16-bit resolution
|
||||
chans = 1 # 1 channel
|
||||
samp_rate = 44100 # 44.1kHz sampling rate
|
||||
chunk = 4096 # 2^12 samples for buffer
|
||||
record_secs = max_recording_time_s # seconds to record
|
||||
|
||||
audio = pyaudio.PyAudio() # create pyaudio instantiation
|
||||
|
||||
# create pyaudio stream
|
||||
stream = audio.open(format = form_1,rate = samp_rate,channels = chans, \
|
||||
input_device_index = device_id,input = True, \
|
||||
frames_per_buffer=chunk)
|
||||
print(f"Recording via microphone for {max_recording_time_s} seconds")
|
||||
frames = []
|
||||
|
||||
# loop through stream and append audio chunks to frame array
|
||||
for ii in range(0,int((samp_rate/chunk)*record_secs)):
|
||||
data = stream.read(chunk)
|
||||
frames.append(data)
|
||||
|
||||
print("Finished recording")
|
||||
|
||||
# stop the stream, close it, and terminate the pyaudio instantiation
|
||||
stream.stop_stream()
|
||||
stream.close()
|
||||
audio.terminate()
|
||||
|
||||
# save the audio frames as .wav file
|
||||
wavefile = wave.open(fname,'wb')
|
||||
wavefile.setnchannels(chans)
|
||||
wavefile.setsampwidth(audio.get_sample_size(form_1))
|
||||
wavefile.setframerate(samp_rate)
|
||||
wavefile.writeframes(b''.join(frames))
|
||||
wavefile.close()
|
||||
|
||||
def get_mics():
|
||||
p = pyaudio.PyAudio()
|
||||
for ii in range(p.get_device_count()):
|
||||
device_name = p.get_device_info_by_index(ii).get('name')
|
||||
print(f"{ii}: {device_name}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
get_mics()
|
||||
dirname = os.path.dirname(__file__)
|
||||
record(dirname, device_id=0, max_recording_time_s=5)
|
25
chatbot/chat/speech_recognition.py
Normal file
25
chatbot/chat/speech_recognition.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
import os
|
||||
import json
|
||||
|
||||
import openai
|
||||
|
||||
def transcribe(dirname, openai_key, file = "recorded.wav"):
|
||||
print("###################")
|
||||
openai.api_key = openai_key
|
||||
fname = os.path.join(dirname, file)
|
||||
audio_file = open(fname, "rb")
|
||||
print(f"Transcribing audio via OpenAI Whisper ...")
|
||||
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
||||
recognized_text = transcript.text
|
||||
print(f"Recognized text: \n > {recognized_text}")
|
||||
return recognized_text
|
||||
|
||||
if __name__ == "__main__":
|
||||
dirname = os.path.dirname(__file__)
|
||||
with open(os.path.join(dirname, "config.json")) as config_file:
|
||||
config = json.load(config_file)
|
||||
openai_key=config['OPENAI_KEY']
|
||||
transcribed = transcribe(dirname, openai_key, file="test.m4a")
|
||||
print(transcribed)
|
||||
|
||||
|
14
chatbot/chat/speech_synthesis.py
Normal file
14
chatbot/chat/speech_synthesis.py
Normal file
|
@ -0,0 +1,14 @@
|
|||
import subprocess
|
||||
|
||||
def speak(text, voice="de+f4"):
|
||||
print("###################")
|
||||
print("Generating audio from text")
|
||||
cmd = f'espeak -v{voice} -p20 -s150 "{text}"'
|
||||
process_return = subprocess.call(cmd, shell=True)
|
||||
#print(process_return)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
speak("Murnau ist eine kleine Stadt in Bayern, Deutschland, bekannt für seine atemberaubende Natur, \
|
||||
insbesondere den Murnauer Moos Nationalpark, sowie für seine malerische Architektur und deutsche Kultur.",
|
||||
voice="de+klatt2")
|
3
chatbot/requirements.txt
Normal file
3
chatbot/requirements.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
openai
|
||||
pyaudio
|
||||
|
Loading…
Reference in a new issue