fabianbrin
/
model-ml


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
							from pathlib import Path
from typing import Dict
import os
import tempfile

from .stt import transcribe
from .detect_lang import detect_lang
from .translate import translate_text
from .tts import synthesize


AUDIO_DIR = Path("/work/models/audio")


def map_to_nllb(code: str) -> str:
    """
    Mappe un code comme 'fr' vers le code NLLB 'fra_Latn'.
    Pour la langue cible, on lit TARGET_LANG_CODE dans l’environnement.
    """
    if code.startswith("fr"):
        return "fra_Latn"
    target = os.getenv("TARGET_LANG_CODE", "lin_Latn")
    return target


def process_audio_file(file_bytes: bytes, filename: str) -> Dict:
    # Sauvegarde temporaire
    tmp_path = Path(tempfile.mkstemp(suffix=os.path.splitext(filename)[1] or ".wav")[1])
    with open(tmp_path, "wb") as f:
        f.write(file_bytes)

    # STT
    text, whisper_lang = transcribe(tmp_path)

    # Détection de langue
    code = detect_lang(text, whisper_lang)

    # Déterminer direction
    src_nllb = map_to_nllb(code)
    tgt_nllb = "fra_Latn" if src_nllb != "fra_Latn" else os.getenv("TARGET_LANG_CODE", "lin_Latn")

    # Traduction
    translated = translate_text(text, src_nllb, tgt_nllb)

    # TTS
    tts_lang = "fr" if tgt_nllb == "fra_Latn" else "xx"  # 'xx' = code générique
    out_path = synthesize(translated, tts_lang, AUDIO_DIR)

    return {
        "source_text": text,
        "detected_lang": src_nllb,
        "translated_text": translated,
        "audio_url": f"/audio/{out_path.name}",
    }