From 34729f2a779c17718b4f025685ec8e15b4fbe0d7 Mon Sep 17 00:00:00 2001 From: Khyretos Date: Sun, 24 Dec 2023 15:52:15 +0100 Subject: [PATCH] added backend code --- .gitignore | 3 +- src/backend/loquendoBot_backend.py | 254 +++++++++++++++++++++++++++++ 2 files changed, 256 insertions(+), 1 deletion(-) create mode 100644 src/backend/loquendoBot_backend.py diff --git a/.gitignore b/.gitignore index e7be437..7e572bb 100644 --- a/.gitignore +++ b/.gitignore @@ -96,7 +96,8 @@ src/config/settings.ini speech_to_text_models/* !speech_to_text_models/Where to get STT models.txt build/ -backend/ +backend/* +!backend/loquendoBot_backend.py language_detection_model/* !language_detection_model/Where to get language detection model.txt .vscode/ diff --git a/src/backend/loquendoBot_backend.py b/src/backend/loquendoBot_backend.py new file mode 100644 index 0000000..1f3b5c7 --- /dev/null +++ b/src/backend/loquendoBot_backend.py @@ -0,0 +1,254 @@ +from flask import Flask, Response, jsonify, request +import gevent +import gevent.monkey +import json + +gevent.monkey.patch_all() +import gevent.queue + +import configparser +import pyttsx3 +import sys +import os + +import queue +import sys +import sounddevice as sd + +import fasttext + +from deep_translator import ( + MyMemoryTranslator, +) + +import emoji + +from vosk import Model, KaldiRecognizer, SetLogLevel + +# global variables + +SetLogLevel(-1) + +settings = configparser.ConfigParser() +app = Flask(__name__) + + +settingsPath = "" +environment = "" +q = queue.Queue() + + +# gobal functions + +# classes + + +class LanguageDetection: + def __init__(self): + pretrained_lang_model = ( + r"C:\repos\LoquendoBotV2\language_detection_model\lid.176.bin" + ) + self.model = fasttext.load_model(pretrained_lang_model) + + def predict_lang(self, text): + predictions = self.model.predict(text, k=5) # returns top 2 matching languages + language_codes = [] + for prediction in predictions[0]: + language_codes.append(prediction.replace("__label__", "")) + + return language_codes + + +class STT: + samplerate = None + args = "" + remaining = "" + + def __init__(self): + device_info = sd.query_devices(0, "input") + self.samplerate = int(device_info["default_samplerate"]) + self.model = Model( + r"C:\repos\LoquendoBotV2\speech_to_text_models\vosk-model-small-es-0.42" + ) + self.dump_fn = None + + self.q = gevent.queue.Queue() + self.rec = None + self.is_running = False + + def callback(self, indata, frames, time, status): + if status: + print(status, file=sys.stderr) + self.q.put(bytes(indata)) + + def start_recognition(self): + self.is_running = True + + with sd.RawInputStream( + samplerate=self.samplerate, + blocksize=8000, + device=0, # Default microphone + dtype="int16", + channels=1, + callback=self.callback, + ): + self.rec = KaldiRecognizer(self.model, self.samplerate) + while True: + data = self.q.get() + if self.rec.AcceptWaveform(data): + result = self.rec.Result() + result_json = json.loads(str(result)) + yield f"data: {result_json}\n\n" + else: + partialResult = self.rec.PartialResult() + result_json = json.loads(str(partialResult)) + yield f"data: {result_json}\n\n" + + def stop_recognition(self): + self.is_running = False + + +speech_recognition_service = STT() + + +class TTS: + engine = None + rate = None + + def __init__(self): + self.engine = pyttsx3.init() + + def say(self, message, voice, count): + voices = self.engine.getProperty("voices") + for item in voices: + if item.name == voice: + matching_id = item.id + break + self.engine.setProperty("voice", matching_id) + + if environment == "dev": + settings_folder = os.path.dirname(settingsPath) + src_folder = os.path.dirname(settings_folder) + saveLocation = os.path.join( + src_folder, "sounds\\tts", f"Internal_{count}.mp3" + ) + else: + resources_folder = os.path.dirname(settingsPath) + saveLocation = os.path.join( + resources_folder, "sounds\\tts", f"Internal_{count}.mp3" + ) + + self.engine.save_to_file(message, saveLocation) + self.engine.runAndWait() + + def voices(self): + voices = self.engine.getProperty("voices") + self.engine.say( + "" + ) # engine breaks if you do not say something after getting voices + self.engine.runAndWait() + + return [voice.name for voice in voices] + + +text_to_speech_service = TTS() + +# endpoints + + +@app.route("/stream", methods=["GET"]) +def stream_recognition(): + def generate(): + return speech_recognition_service.start_recognition() + + return Response(generate(), content_type="text/event-stream") + + +@app.route("/stop", methods=["POST"]) +def stop_recording(): + speech_recognition_service.stop_recognition() + return Response("Speech recognition stopped", status=200) + + +# @app.before_request +# def custom_warning(): +# if environment == "dev": +# print( +# # "Running in internal development environment. This server is not for production use." +# ) + + +@app.route("/terminate", methods=["GET"]) +def terminate_processes(): + shutdown_server() + os._exit(0) + + +def shutdown_server(): + func = request.environ.get("sever shutdown") + if func is None: + raise RuntimeError("Server is not running") + func() + + +# @app.route("/detect", methods=["POST"]) +# def server_status(): +# try: +# request_data = request.json +# message = request_data.get("message", "") +# confidence_values = detector.compute_language_confidence_values(message) +# for language, value in confidence_values: +# print(f"{language.name}: {value:.2f}") +# message = request_data.get("message", "") +# except Exception as e: +# return jsonify({"error": "An error occurred"}), 500 +# return jsonify({"message": "Audio triggered"}), 200 + + +@app.route("/status", methods=["GET"]) +def server_status(): + return jsonify({"status": "server is running"}) + + +@app.route("/audio", methods=["POST"]) +def trigger_backend_event(): + try: + request_data = request.json + message = request_data.get("message", "") + voice = request_data.get("voice") + count = request_data.get("count") + text_to_speech_service.say(message, voice, count) + except Exception as e: + return jsonify({"error": "An error occurred"}), 500 + return jsonify({"message": "Audio triggered"}), 200 + + +@app.route("/voices", methods=["GET"]) +def get_voices(): + try: + voices = text_to_speech_service.voices() + return jsonify({"voices": voices}), 200 + except Exception as e: + return jsonify({"error": "An error occurred"}), 500 + + +if __name__ == "__main__": + LANGUAGE = LanguageDetection() + lang = LANGUAGE.predict_lang("hola") + print(lang) + text = "Keep it up. You are awesome" + translated = MyMemoryTranslator( + source="english", target="spanish latin america" + ).translate(text) + print(translated) + if len(sys.argv) > 1: + environment = sys.argv[2] + settingsPath = os.path.normpath(sys.argv[1]) + settings.read(settingsPath) + port = int(settings["GENERAL"]["PORT"]) + else: + environment = "dev" + port = 9000 + stream_recognition() + + app.run(host="127.0.0.1", port=port)