added backend code
This commit is contained in:
parent
0b0af74bf1
commit
34729f2a77
2 changed files with 256 additions and 1 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -96,7 +96,8 @@ src/config/settings.ini
|
|||
speech_to_text_models/*
|
||||
!speech_to_text_models/Where to get STT models.txt
|
||||
build/
|
||||
backend/
|
||||
backend/*
|
||||
!backend/loquendoBot_backend.py
|
||||
language_detection_model/*
|
||||
!language_detection_model/Where to get language detection model.txt
|
||||
.vscode/
|
||||
|
|
|
|||
254
src/backend/loquendoBot_backend.py
Normal file
254
src/backend/loquendoBot_backend.py
Normal file
|
|
@ -0,0 +1,254 @@
|
|||
from flask import Flask, Response, jsonify, request
|
||||
import gevent
|
||||
import gevent.monkey
|
||||
import json
|
||||
|
||||
gevent.monkey.patch_all()
|
||||
import gevent.queue
|
||||
|
||||
import configparser
|
||||
import pyttsx3
|
||||
import sys
|
||||
import os
|
||||
|
||||
import queue
|
||||
import sys
|
||||
import sounddevice as sd
|
||||
|
||||
import fasttext
|
||||
|
||||
from deep_translator import (
|
||||
MyMemoryTranslator,
|
||||
)
|
||||
|
||||
import emoji
|
||||
|
||||
from vosk import Model, KaldiRecognizer, SetLogLevel
|
||||
|
||||
# global variables
|
||||
|
||||
SetLogLevel(-1)
|
||||
|
||||
settings = configparser.ConfigParser()
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
settingsPath = ""
|
||||
environment = ""
|
||||
q = queue.Queue()
|
||||
|
||||
|
||||
# gobal functions
|
||||
|
||||
# classes
|
||||
|
||||
|
||||
class LanguageDetection:
|
||||
def __init__(self):
|
||||
pretrained_lang_model = (
|
||||
r"C:\repos\LoquendoBotV2\language_detection_model\lid.176.bin"
|
||||
)
|
||||
self.model = fasttext.load_model(pretrained_lang_model)
|
||||
|
||||
def predict_lang(self, text):
|
||||
predictions = self.model.predict(text, k=5) # returns top 2 matching languages
|
||||
language_codes = []
|
||||
for prediction in predictions[0]:
|
||||
language_codes.append(prediction.replace("__label__", ""))
|
||||
|
||||
return language_codes
|
||||
|
||||
|
||||
class STT:
|
||||
samplerate = None
|
||||
args = ""
|
||||
remaining = ""
|
||||
|
||||
def __init__(self):
|
||||
device_info = sd.query_devices(0, "input")
|
||||
self.samplerate = int(device_info["default_samplerate"])
|
||||
self.model = Model(
|
||||
r"C:\repos\LoquendoBotV2\speech_to_text_models\vosk-model-small-es-0.42"
|
||||
)
|
||||
self.dump_fn = None
|
||||
|
||||
self.q = gevent.queue.Queue()
|
||||
self.rec = None
|
||||
self.is_running = False
|
||||
|
||||
def callback(self, indata, frames, time, status):
|
||||
if status:
|
||||
print(status, file=sys.stderr)
|
||||
self.q.put(bytes(indata))
|
||||
|
||||
def start_recognition(self):
|
||||
self.is_running = True
|
||||
|
||||
with sd.RawInputStream(
|
||||
samplerate=self.samplerate,
|
||||
blocksize=8000,
|
||||
device=0, # Default microphone
|
||||
dtype="int16",
|
||||
channels=1,
|
||||
callback=self.callback,
|
||||
):
|
||||
self.rec = KaldiRecognizer(self.model, self.samplerate)
|
||||
while True:
|
||||
data = self.q.get()
|
||||
if self.rec.AcceptWaveform(data):
|
||||
result = self.rec.Result()
|
||||
result_json = json.loads(str(result))
|
||||
yield f"data: {result_json}\n\n"
|
||||
else:
|
||||
partialResult = self.rec.PartialResult()
|
||||
result_json = json.loads(str(partialResult))
|
||||
yield f"data: {result_json}\n\n"
|
||||
|
||||
def stop_recognition(self):
|
||||
self.is_running = False
|
||||
|
||||
|
||||
speech_recognition_service = STT()
|
||||
|
||||
|
||||
class TTS:
|
||||
engine = None
|
||||
rate = None
|
||||
|
||||
def __init__(self):
|
||||
self.engine = pyttsx3.init()
|
||||
|
||||
def say(self, message, voice, count):
|
||||
voices = self.engine.getProperty("voices")
|
||||
for item in voices:
|
||||
if item.name == voice:
|
||||
matching_id = item.id
|
||||
break
|
||||
self.engine.setProperty("voice", matching_id)
|
||||
|
||||
if environment == "dev":
|
||||
settings_folder = os.path.dirname(settingsPath)
|
||||
src_folder = os.path.dirname(settings_folder)
|
||||
saveLocation = os.path.join(
|
||||
src_folder, "sounds\\tts", f"Internal_{count}.mp3"
|
||||
)
|
||||
else:
|
||||
resources_folder = os.path.dirname(settingsPath)
|
||||
saveLocation = os.path.join(
|
||||
resources_folder, "sounds\\tts", f"Internal_{count}.mp3"
|
||||
)
|
||||
|
||||
self.engine.save_to_file(message, saveLocation)
|
||||
self.engine.runAndWait()
|
||||
|
||||
def voices(self):
|
||||
voices = self.engine.getProperty("voices")
|
||||
self.engine.say(
|
||||
""
|
||||
) # engine breaks if you do not say something after getting voices
|
||||
self.engine.runAndWait()
|
||||
|
||||
return [voice.name for voice in voices]
|
||||
|
||||
|
||||
text_to_speech_service = TTS()
|
||||
|
||||
# endpoints
|
||||
|
||||
|
||||
@app.route("/stream", methods=["GET"])
|
||||
def stream_recognition():
|
||||
def generate():
|
||||
return speech_recognition_service.start_recognition()
|
||||
|
||||
return Response(generate(), content_type="text/event-stream")
|
||||
|
||||
|
||||
@app.route("/stop", methods=["POST"])
|
||||
def stop_recording():
|
||||
speech_recognition_service.stop_recognition()
|
||||
return Response("Speech recognition stopped", status=200)
|
||||
|
||||
|
||||
# @app.before_request
|
||||
# def custom_warning():
|
||||
# if environment == "dev":
|
||||
# print(
|
||||
# # "Running in internal development environment. This server is not for production use."
|
||||
# )
|
||||
|
||||
|
||||
@app.route("/terminate", methods=["GET"])
|
||||
def terminate_processes():
|
||||
shutdown_server()
|
||||
os._exit(0)
|
||||
|
||||
|
||||
def shutdown_server():
|
||||
func = request.environ.get("sever shutdown")
|
||||
if func is None:
|
||||
raise RuntimeError("Server is not running")
|
||||
func()
|
||||
|
||||
|
||||
# @app.route("/detect", methods=["POST"])
|
||||
# def server_status():
|
||||
# try:
|
||||
# request_data = request.json
|
||||
# message = request_data.get("message", "")
|
||||
# confidence_values = detector.compute_language_confidence_values(message)
|
||||
# for language, value in confidence_values:
|
||||
# print(f"{language.name}: {value:.2f}")
|
||||
# message = request_data.get("message", "")
|
||||
# except Exception as e:
|
||||
# return jsonify({"error": "An error occurred"}), 500
|
||||
# return jsonify({"message": "Audio triggered"}), 200
|
||||
|
||||
|
||||
@app.route("/status", methods=["GET"])
|
||||
def server_status():
|
||||
return jsonify({"status": "server is running"})
|
||||
|
||||
|
||||
@app.route("/audio", methods=["POST"])
|
||||
def trigger_backend_event():
|
||||
try:
|
||||
request_data = request.json
|
||||
message = request_data.get("message", "")
|
||||
voice = request_data.get("voice")
|
||||
count = request_data.get("count")
|
||||
text_to_speech_service.say(message, voice, count)
|
||||
except Exception as e:
|
||||
return jsonify({"error": "An error occurred"}), 500
|
||||
return jsonify({"message": "Audio triggered"}), 200
|
||||
|
||||
|
||||
@app.route("/voices", methods=["GET"])
|
||||
def get_voices():
|
||||
try:
|
||||
voices = text_to_speech_service.voices()
|
||||
return jsonify({"voices": voices}), 200
|
||||
except Exception as e:
|
||||
return jsonify({"error": "An error occurred"}), 500
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
LANGUAGE = LanguageDetection()
|
||||
lang = LANGUAGE.predict_lang("hola")
|
||||
print(lang)
|
||||
text = "Keep it up. You are awesome"
|
||||
translated = MyMemoryTranslator(
|
||||
source="english", target="spanish latin america"
|
||||
).translate(text)
|
||||
print(translated)
|
||||
if len(sys.argv) > 1:
|
||||
environment = sys.argv[2]
|
||||
settingsPath = os.path.normpath(sys.argv[1])
|
||||
settings.read(settingsPath)
|
||||
port = int(settings["GENERAL"]["PORT"])
|
||||
else:
|
||||
environment = "dev"
|
||||
port = 9000
|
||||
stream_recognition()
|
||||
|
||||
app.run(host="127.0.0.1", port=port)
|
||||
Loading…
Reference in a new issue