added backend code

This commit is contained in:
Khyretos 2023-12-24 15:52:15 +01:00
parent 0b0af74bf1
commit 34729f2a77
2 changed files with 256 additions and 1 deletions

3
.gitignore vendored
View file

@ -96,7 +96,8 @@ src/config/settings.ini
speech_to_text_models/*
!speech_to_text_models/Where to get STT models.txt
build/
backend/
backend/*
!backend/loquendoBot_backend.py
language_detection_model/*
!language_detection_model/Where to get language detection model.txt
.vscode/

View file

@ -0,0 +1,254 @@
from flask import Flask, Response, jsonify, request
import gevent
import gevent.monkey
import json
gevent.monkey.patch_all()
import gevent.queue
import configparser
import pyttsx3
import sys
import os
import queue
import sys
import sounddevice as sd
import fasttext
from deep_translator import (
MyMemoryTranslator,
)
import emoji
from vosk import Model, KaldiRecognizer, SetLogLevel
# global variables
SetLogLevel(-1)
settings = configparser.ConfigParser()
app = Flask(__name__)
settingsPath = ""
environment = ""
q = queue.Queue()
# gobal functions
# classes
class LanguageDetection:
def __init__(self):
pretrained_lang_model = (
r"C:\repos\LoquendoBotV2\language_detection_model\lid.176.bin"
)
self.model = fasttext.load_model(pretrained_lang_model)
def predict_lang(self, text):
predictions = self.model.predict(text, k=5) # returns top 2 matching languages
language_codes = []
for prediction in predictions[0]:
language_codes.append(prediction.replace("__label__", ""))
return language_codes
class STT:
samplerate = None
args = ""
remaining = ""
def __init__(self):
device_info = sd.query_devices(0, "input")
self.samplerate = int(device_info["default_samplerate"])
self.model = Model(
r"C:\repos\LoquendoBotV2\speech_to_text_models\vosk-model-small-es-0.42"
)
self.dump_fn = None
self.q = gevent.queue.Queue()
self.rec = None
self.is_running = False
def callback(self, indata, frames, time, status):
if status:
print(status, file=sys.stderr)
self.q.put(bytes(indata))
def start_recognition(self):
self.is_running = True
with sd.RawInputStream(
samplerate=self.samplerate,
blocksize=8000,
device=0, # Default microphone
dtype="int16",
channels=1,
callback=self.callback,
):
self.rec = KaldiRecognizer(self.model, self.samplerate)
while True:
data = self.q.get()
if self.rec.AcceptWaveform(data):
result = self.rec.Result()
result_json = json.loads(str(result))
yield f"data: {result_json}\n\n"
else:
partialResult = self.rec.PartialResult()
result_json = json.loads(str(partialResult))
yield f"data: {result_json}\n\n"
def stop_recognition(self):
self.is_running = False
speech_recognition_service = STT()
class TTS:
engine = None
rate = None
def __init__(self):
self.engine = pyttsx3.init()
def say(self, message, voice, count):
voices = self.engine.getProperty("voices")
for item in voices:
if item.name == voice:
matching_id = item.id
break
self.engine.setProperty("voice", matching_id)
if environment == "dev":
settings_folder = os.path.dirname(settingsPath)
src_folder = os.path.dirname(settings_folder)
saveLocation = os.path.join(
src_folder, "sounds\\tts", f"Internal_{count}.mp3"
)
else:
resources_folder = os.path.dirname(settingsPath)
saveLocation = os.path.join(
resources_folder, "sounds\\tts", f"Internal_{count}.mp3"
)
self.engine.save_to_file(message, saveLocation)
self.engine.runAndWait()
def voices(self):
voices = self.engine.getProperty("voices")
self.engine.say(
""
) # engine breaks if you do not say something after getting voices
self.engine.runAndWait()
return [voice.name for voice in voices]
text_to_speech_service = TTS()
# endpoints
@app.route("/stream", methods=["GET"])
def stream_recognition():
def generate():
return speech_recognition_service.start_recognition()
return Response(generate(), content_type="text/event-stream")
@app.route("/stop", methods=["POST"])
def stop_recording():
speech_recognition_service.stop_recognition()
return Response("Speech recognition stopped", status=200)
# @app.before_request
# def custom_warning():
# if environment == "dev":
# print(
# # "Running in internal development environment. This server is not for production use."
# )
@app.route("/terminate", methods=["GET"])
def terminate_processes():
shutdown_server()
os._exit(0)
def shutdown_server():
func = request.environ.get("sever shutdown")
if func is None:
raise RuntimeError("Server is not running")
func()
# @app.route("/detect", methods=["POST"])
# def server_status():
# try:
# request_data = request.json
# message = request_data.get("message", "")
# confidence_values = detector.compute_language_confidence_values(message)
# for language, value in confidence_values:
# print(f"{language.name}: {value:.2f}")
# message = request_data.get("message", "")
# except Exception as e:
# return jsonify({"error": "An error occurred"}), 500
# return jsonify({"message": "Audio triggered"}), 200
@app.route("/status", methods=["GET"])
def server_status():
return jsonify({"status": "server is running"})
@app.route("/audio", methods=["POST"])
def trigger_backend_event():
try:
request_data = request.json
message = request_data.get("message", "")
voice = request_data.get("voice")
count = request_data.get("count")
text_to_speech_service.say(message, voice, count)
except Exception as e:
return jsonify({"error": "An error occurred"}), 500
return jsonify({"message": "Audio triggered"}), 200
@app.route("/voices", methods=["GET"])
def get_voices():
try:
voices = text_to_speech_service.voices()
return jsonify({"voices": voices}), 200
except Exception as e:
return jsonify({"error": "An error occurred"}), 500
if __name__ == "__main__":
LANGUAGE = LanguageDetection()
lang = LANGUAGE.predict_lang("hola")
print(lang)
text = "Keep it up. You are awesome"
translated = MyMemoryTranslator(
source="english", target="spanish latin america"
).translate(text)
print(translated)
if len(sys.argv) > 1:
environment = sys.argv[2]
settingsPath = os.path.normpath(sys.argv[1])
settings.read(settingsPath)
port = int(settings["GENERAL"]["PORT"])
else:
environment = "dev"
port = 9000
stream_recognition()
app.run(host="127.0.0.1", port=port)