added backend code
This commit is contained in:
parent
0b0af74bf1
commit
34729f2a77
2 changed files with 256 additions and 1 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -96,7 +96,8 @@ src/config/settings.ini
|
||||||
speech_to_text_models/*
|
speech_to_text_models/*
|
||||||
!speech_to_text_models/Where to get STT models.txt
|
!speech_to_text_models/Where to get STT models.txt
|
||||||
build/
|
build/
|
||||||
backend/
|
backend/*
|
||||||
|
!backend/loquendoBot_backend.py
|
||||||
language_detection_model/*
|
language_detection_model/*
|
||||||
!language_detection_model/Where to get language detection model.txt
|
!language_detection_model/Where to get language detection model.txt
|
||||||
.vscode/
|
.vscode/
|
||||||
|
|
|
||||||
254
src/backend/loquendoBot_backend.py
Normal file
254
src/backend/loquendoBot_backend.py
Normal file
|
|
@ -0,0 +1,254 @@
|
||||||
|
from flask import Flask, Response, jsonify, request
|
||||||
|
import gevent
|
||||||
|
import gevent.monkey
|
||||||
|
import json
|
||||||
|
|
||||||
|
gevent.monkey.patch_all()
|
||||||
|
import gevent.queue
|
||||||
|
|
||||||
|
import configparser
|
||||||
|
import pyttsx3
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
import queue
|
||||||
|
import sys
|
||||||
|
import sounddevice as sd
|
||||||
|
|
||||||
|
import fasttext
|
||||||
|
|
||||||
|
from deep_translator import (
|
||||||
|
MyMemoryTranslator,
|
||||||
|
)
|
||||||
|
|
||||||
|
import emoji
|
||||||
|
|
||||||
|
from vosk import Model, KaldiRecognizer, SetLogLevel
|
||||||
|
|
||||||
|
# global variables
|
||||||
|
|
||||||
|
SetLogLevel(-1)
|
||||||
|
|
||||||
|
settings = configparser.ConfigParser()
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
settingsPath = ""
|
||||||
|
environment = ""
|
||||||
|
q = queue.Queue()
|
||||||
|
|
||||||
|
|
||||||
|
# gobal functions
|
||||||
|
|
||||||
|
# classes
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageDetection:
|
||||||
|
def __init__(self):
|
||||||
|
pretrained_lang_model = (
|
||||||
|
r"C:\repos\LoquendoBotV2\language_detection_model\lid.176.bin"
|
||||||
|
)
|
||||||
|
self.model = fasttext.load_model(pretrained_lang_model)
|
||||||
|
|
||||||
|
def predict_lang(self, text):
|
||||||
|
predictions = self.model.predict(text, k=5) # returns top 2 matching languages
|
||||||
|
language_codes = []
|
||||||
|
for prediction in predictions[0]:
|
||||||
|
language_codes.append(prediction.replace("__label__", ""))
|
||||||
|
|
||||||
|
return language_codes
|
||||||
|
|
||||||
|
|
||||||
|
class STT:
|
||||||
|
samplerate = None
|
||||||
|
args = ""
|
||||||
|
remaining = ""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
device_info = sd.query_devices(0, "input")
|
||||||
|
self.samplerate = int(device_info["default_samplerate"])
|
||||||
|
self.model = Model(
|
||||||
|
r"C:\repos\LoquendoBotV2\speech_to_text_models\vosk-model-small-es-0.42"
|
||||||
|
)
|
||||||
|
self.dump_fn = None
|
||||||
|
|
||||||
|
self.q = gevent.queue.Queue()
|
||||||
|
self.rec = None
|
||||||
|
self.is_running = False
|
||||||
|
|
||||||
|
def callback(self, indata, frames, time, status):
|
||||||
|
if status:
|
||||||
|
print(status, file=sys.stderr)
|
||||||
|
self.q.put(bytes(indata))
|
||||||
|
|
||||||
|
def start_recognition(self):
|
||||||
|
self.is_running = True
|
||||||
|
|
||||||
|
with sd.RawInputStream(
|
||||||
|
samplerate=self.samplerate,
|
||||||
|
blocksize=8000,
|
||||||
|
device=0, # Default microphone
|
||||||
|
dtype="int16",
|
||||||
|
channels=1,
|
||||||
|
callback=self.callback,
|
||||||
|
):
|
||||||
|
self.rec = KaldiRecognizer(self.model, self.samplerate)
|
||||||
|
while True:
|
||||||
|
data = self.q.get()
|
||||||
|
if self.rec.AcceptWaveform(data):
|
||||||
|
result = self.rec.Result()
|
||||||
|
result_json = json.loads(str(result))
|
||||||
|
yield f"data: {result_json}\n\n"
|
||||||
|
else:
|
||||||
|
partialResult = self.rec.PartialResult()
|
||||||
|
result_json = json.loads(str(partialResult))
|
||||||
|
yield f"data: {result_json}\n\n"
|
||||||
|
|
||||||
|
def stop_recognition(self):
|
||||||
|
self.is_running = False
|
||||||
|
|
||||||
|
|
||||||
|
speech_recognition_service = STT()
|
||||||
|
|
||||||
|
|
||||||
|
class TTS:
|
||||||
|
engine = None
|
||||||
|
rate = None
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.engine = pyttsx3.init()
|
||||||
|
|
||||||
|
def say(self, message, voice, count):
|
||||||
|
voices = self.engine.getProperty("voices")
|
||||||
|
for item in voices:
|
||||||
|
if item.name == voice:
|
||||||
|
matching_id = item.id
|
||||||
|
break
|
||||||
|
self.engine.setProperty("voice", matching_id)
|
||||||
|
|
||||||
|
if environment == "dev":
|
||||||
|
settings_folder = os.path.dirname(settingsPath)
|
||||||
|
src_folder = os.path.dirname(settings_folder)
|
||||||
|
saveLocation = os.path.join(
|
||||||
|
src_folder, "sounds\\tts", f"Internal_{count}.mp3"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
resources_folder = os.path.dirname(settingsPath)
|
||||||
|
saveLocation = os.path.join(
|
||||||
|
resources_folder, "sounds\\tts", f"Internal_{count}.mp3"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.engine.save_to_file(message, saveLocation)
|
||||||
|
self.engine.runAndWait()
|
||||||
|
|
||||||
|
def voices(self):
|
||||||
|
voices = self.engine.getProperty("voices")
|
||||||
|
self.engine.say(
|
||||||
|
""
|
||||||
|
) # engine breaks if you do not say something after getting voices
|
||||||
|
self.engine.runAndWait()
|
||||||
|
|
||||||
|
return [voice.name for voice in voices]
|
||||||
|
|
||||||
|
|
||||||
|
text_to_speech_service = TTS()
|
||||||
|
|
||||||
|
# endpoints
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/stream", methods=["GET"])
|
||||||
|
def stream_recognition():
|
||||||
|
def generate():
|
||||||
|
return speech_recognition_service.start_recognition()
|
||||||
|
|
||||||
|
return Response(generate(), content_type="text/event-stream")
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/stop", methods=["POST"])
|
||||||
|
def stop_recording():
|
||||||
|
speech_recognition_service.stop_recognition()
|
||||||
|
return Response("Speech recognition stopped", status=200)
|
||||||
|
|
||||||
|
|
||||||
|
# @app.before_request
|
||||||
|
# def custom_warning():
|
||||||
|
# if environment == "dev":
|
||||||
|
# print(
|
||||||
|
# # "Running in internal development environment. This server is not for production use."
|
||||||
|
# )
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/terminate", methods=["GET"])
|
||||||
|
def terminate_processes():
|
||||||
|
shutdown_server()
|
||||||
|
os._exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
def shutdown_server():
|
||||||
|
func = request.environ.get("sever shutdown")
|
||||||
|
if func is None:
|
||||||
|
raise RuntimeError("Server is not running")
|
||||||
|
func()
|
||||||
|
|
||||||
|
|
||||||
|
# @app.route("/detect", methods=["POST"])
|
||||||
|
# def server_status():
|
||||||
|
# try:
|
||||||
|
# request_data = request.json
|
||||||
|
# message = request_data.get("message", "")
|
||||||
|
# confidence_values = detector.compute_language_confidence_values(message)
|
||||||
|
# for language, value in confidence_values:
|
||||||
|
# print(f"{language.name}: {value:.2f}")
|
||||||
|
# message = request_data.get("message", "")
|
||||||
|
# except Exception as e:
|
||||||
|
# return jsonify({"error": "An error occurred"}), 500
|
||||||
|
# return jsonify({"message": "Audio triggered"}), 200
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/status", methods=["GET"])
|
||||||
|
def server_status():
|
||||||
|
return jsonify({"status": "server is running"})
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/audio", methods=["POST"])
|
||||||
|
def trigger_backend_event():
|
||||||
|
try:
|
||||||
|
request_data = request.json
|
||||||
|
message = request_data.get("message", "")
|
||||||
|
voice = request_data.get("voice")
|
||||||
|
count = request_data.get("count")
|
||||||
|
text_to_speech_service.say(message, voice, count)
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({"error": "An error occurred"}), 500
|
||||||
|
return jsonify({"message": "Audio triggered"}), 200
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/voices", methods=["GET"])
|
||||||
|
def get_voices():
|
||||||
|
try:
|
||||||
|
voices = text_to_speech_service.voices()
|
||||||
|
return jsonify({"voices": voices}), 200
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({"error": "An error occurred"}), 500
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
LANGUAGE = LanguageDetection()
|
||||||
|
lang = LANGUAGE.predict_lang("hola")
|
||||||
|
print(lang)
|
||||||
|
text = "Keep it up. You are awesome"
|
||||||
|
translated = MyMemoryTranslator(
|
||||||
|
source="english", target="spanish latin america"
|
||||||
|
).translate(text)
|
||||||
|
print(translated)
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
environment = sys.argv[2]
|
||||||
|
settingsPath = os.path.normpath(sys.argv[1])
|
||||||
|
settings.read(settingsPath)
|
||||||
|
port = int(settings["GENERAL"]["PORT"])
|
||||||
|
else:
|
||||||
|
environment = "dev"
|
||||||
|
port = 9000
|
||||||
|
stream_recognition()
|
||||||
|
|
||||||
|
app.run(host="127.0.0.1", port=port)
|
||||||
Loading…
Reference in a new issue