stt.py 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. from pathlib import Path
  2. from typing import Tuple
  3. import tempfile
  4. import subprocess
  5. import whisper
  6. _WHISPER_MODEL = None
  7. def _get_model(name: str = "base"):
  8. global _WHISPER_MODEL
  9. if _WHISPER_MODEL is None:
  10. _WHISPER_MODEL = whisper.load_model(name)
  11. return _WHISPER_MODEL
  12. def ensure_wav(input_path: Path) -> Path:
  13. """Convertit l'audio en WAV si nécessaire via ffmpeg."""
  14. if input_path.suffix.lower() == ".wav":
  15. return input_path
  16. out = Path(tempfile.mkstemp(suffix=".wav")[1])
  17. cmd = [
  18. "ffmpeg",
  19. "-i",
  20. str(input_path),
  21. "-ar",
  22. "16000",
  23. "-ac",
  24. "1",
  25. str(out),
  26. "-y",
  27. ]
  28. subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  29. return out
  30. def transcribe(audio_path: Path) -> Tuple[str, str]:
  31. """
  32. Transcrit le fichier audio avec Whisper.
  33. Retourne (texte, langue_detectee_par_whisper).
  34. """
  35. model = _get_model()
  36. wav_path = ensure_wav(audio_path)
  37. result = model.transcribe(str(wav_path))
  38. text = result.get("text", "").strip()
  39. lang = result.get("language", "") # e.g., 'fr'
  40. return text, lang