diff --git a/app/audio/generate.py b/app/audio/generate.py new file mode 100644 index 0000000..da4dfe4 --- /dev/null +++ b/app/audio/generate.py @@ -0,0 +1,124 @@ +# app/audio/generate.py +from __future__ import annotations + +import platform +import shutil +import subprocess +from pathlib import Path + +from loguru import logger + + +def generate_rep_audio_files( + *, + max_count: int, + rate: int, + output_dir: Path, + overwrite: bool = False, +) -> None: + """ + 确保 0~max_count 的运动次数语音 wav 文件存在。 + + 默认生成到: + + app/audio/reps/0.wav + app/audio/reps/1.wav + ... + app/audio/reps/200.wav + + 服务启动时调用一次即可。 + """ + output_dir.mkdir(parents=True, exist_ok=True) + + missing_counts = [ + count + for count in range(0, max_count + 1) + if overwrite or not _audio_path(output_dir, count).exists() + ] + + if not missing_counts: + logger.info("Rep audio files already prepared: {}", output_dir) + return + + system = platform.system().lower() + + logger.info( + "Preparing rep audio files, system={}, count={}, output_dir={}", + system, + len(missing_counts), + output_dir, + ) + + if system == "darwin": + _generate_with_macos_say( + counts=missing_counts, + output_dir=output_dir, + rate=rate, + ) + else: + _generate_with_pyttsx3( + counts=missing_counts, + output_dir=output_dir, + rate=rate, + ) + + logger.info("Rep audio files prepared: {}", output_dir) + + +def _generate_with_macos_say( + *, + counts: list[int], + output_dir: Path, + rate: int, +) -> None: + """macOS 使用 say 命令生成 wav。""" + if platform.system().lower() != "darwin": + raise RuntimeError("say command is only available on macOS") + + if shutil.which("say") is None: + raise RuntimeError("macOS say command not found") + + for count in counts: + audio_file = _audio_path(output_dir, count) + + subprocess.run( + [ + "say", + "-r", + str(rate), + "--file-format=WAVE", + "-o", + str(audio_file), + str(count), + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=True, + ) + + +def _generate_with_pyttsx3( + *, + counts: list[int], + output_dir: Path, + rate: int, +) -> None: + """Windows / Linux 使用 pyttsx3 生成 wav。""" + try: + import pyttsx3 + except Exception as exc: + raise RuntimeError(f"pyttsx3 unavailable: {exc}") from exc + + engine = pyttsx3.init() + engine.setProperty("rate", rate) + engine.setProperty("volume", 1.0) + + for count in counts: + audio_file = _audio_path(output_dir, count) + engine.save_to_file(str(count), str(audio_file)) + + engine.runAndWait() + + +def _audio_path(output_dir: Path, count: int) -> Path: + return output_dir / f"{count}.wav" diff --git a/app/audio/rep_announcer.py b/app/audio/rep_announcer.py index 3c42449..1a00647 100644 --- a/app/audio/rep_announcer.py +++ b/app/audio/rep_announcer.py @@ -1,90 +1,236 @@ from __future__ import annotations +import os import queue +import shutil import subprocess import sys import threading +from pathlib import Path from typing import Any from loguru import logger -class RepAnnouncer: - """运动次数语音播报器""" - def __init__(self, *, enabled: bool = True, rate: int = 185, volume: float = 1.0) -> None: - """初始化TTS引擎(macOS用say,其他系统用pyttsx3)""" +class RepAnnouncer: + """运动次数语音播报器:预生成 0~200 音频文件,运行时直接播放""" + + def __init__( + self, + *, + enabled: bool = True, + rate: int = 185, + volume: float = 1.0, + max_count: int = 200, + cache_dir: str | Path = "runtime/tts_cache/reps", + ) -> None: self.enabled = enabled self.rate = rate self.volume = volume - self._queue: queue.Queue[str | None] = queue.Queue() + self.max_count = max_count + self.cache_dir = Path(cache_dir) + + self._queue: queue.Queue[int | None] = queue.Queue() self._thread: threading.Thread | None = None self._engine: Any | None = None - self._use_macos_say = False self._current_process: subprocess.Popen | None = None + self._closed = False + + self._use_macos_say = sys.platform == "darwin" + self._use_windows_winsound = sys.platform.startswith("win") if self.enabled: self._start() def announce_count(self, count: int) -> None: - """将次数放入队列进行异步语音播报""" - if not self.enabled or count <= 0: + """将次数放入队列,后台线程播放对应音频""" + if not self.enabled or self._closed: return - while True: - try: - self._queue.get_nowait() - except queue.Empty: - break - self._queue.put(str(count)) + + if count <= 0 or count > self.max_count: + return + + # 保留“只播最新一次”的策略,避免语音堆积 + self._clear_pending_counts() + self._queue.put(count) def close(self) -> None: """停止播报线程并释放资源""" - if not self.enabled: + if not self.enabled or self._closed: return + + self._closed = True self._queue.put(None) + if self._thread is not None: self._thread.join(timeout=1.0) + + self._stop_current_playback() + + logger.info("Rep announcer closed") + + def _start(self) -> None: + """初始化并预生成语音缓存""" + self.cache_dir.mkdir(parents=True, exist_ok=True) + + try: + self._prepare_audio_cache() + except Exception as exc: + self.enabled = False + logger.warning("Rep announcer disabled, failed to prepare audio cache: {}", exc) + return + + self._thread = threading.Thread( + target=self._run, + name="RepAnnouncer", + daemon=True, + ) + self._thread.start() + + logger.info( + "Rep announcer initialized with audio cache, platform={}, max_count={}, cache_dir={}", + sys.platform, + self.max_count, + self.cache_dir, + ) + + def _prepare_audio_cache(self) -> None: + """生成 0~max_count 的语音文件,只生成缺失文件""" + if self._use_macos_say: + self._prepare_macos_say_cache() + else: + self._prepare_pyttsx3_cache() + + def _prepare_macos_say_cache(self) -> None: + """macOS: 使用 say 预生成 aiff 文件""" + if shutil.which("say") is None: + raise RuntimeError("macOS say command not found") + + for count in range(0, self.max_count + 1): + audio_file = self._audio_path(count) + + if audio_file.exists(): + continue + + subprocess.run( + [ + "say", + "-r", + str(self.rate), + "-o", + str(audio_file), + str(count), + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=True, + ) + + logger.info("macOS say audio cache prepared") + + def _prepare_pyttsx3_cache(self) -> None: + """非 macOS: 使用 pyttsx3 预生成 wav 文件""" + try: + import pyttsx3 + except Exception as exc: + raise RuntimeError(f"pyttsx3 unavailable: {exc}") from exc + + self._engine = pyttsx3.init() + self._engine.setProperty("rate", self.rate) + self._engine.setProperty("volume", self.volume) + + need_generate = False + + for count in range(0, self.max_count + 1): + if not self._audio_path(count).exists(): + need_generate = True + self._engine.save_to_file(str(count), str(self._audio_path(count))) + + if need_generate: + self._engine.runAndWait() + + logger.info("pyttsx3 audio cache prepared") + + def _run(self) -> None: + """后台线程:只负责播放已经生成好的音频文件""" + while True: + count = self._queue.get() + + if count is None: + return + + try: + audio_file = self._audio_path(count) + + if not audio_file.exists(): + logger.warning("Rep audio file missing: {}", audio_file) + continue + + self._play_audio(audio_file) + + except Exception as exc: + logger.warning("Failed to play rep count {}: {}", count, exc) + + def _play_audio(self, audio_file: Path) -> None: + """根据平台播放音频""" + self._stop_current_playback() + + if self._use_macos_say: + self._current_process = subprocess.Popen( + ["afplay", str(audio_file)], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + return + + if self._use_windows_winsound: + import winsound + + # SND_ASYNC 表示异步播放;PURGE 会被 _stop_current_playback 调用中断 + winsound.PlaySound(str(audio_file), winsound.SND_FILENAME | winsound.SND_ASYNC) + return + + # Linux:优先 paplay,其次 aplay + player = shutil.which("paplay") or shutil.which("aplay") + if player is None: + logger.warning("No audio player found, expected paplay or aplay") + return + + self._current_process = subprocess.Popen( + [player, str(audio_file)], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + def _stop_current_playback(self) -> None: + """中断当前正在播放的声音""" + if self._use_windows_winsound: + try: + import winsound + + winsound.PlaySound(None, winsound.SND_PURGE) + except Exception: + pass + return + if self._current_process is not None and self._current_process.poll() is None: self._current_process.terminate() - def _start(self) -> None: - """根据平台初始化TTS引擎并启动后台播报线程""" - if sys.platform == "darwin": - self._use_macos_say = True - logger.info("Rep announcer initialized with macOS say") - else: - try: - import pyttsx3 + self._current_process = None - self._engine = pyttsx3.init() - self._engine.setProperty("rate", self.rate) - self._engine.setProperty("volume", self.volume) - logger.info("Rep announcer initialized with pyttsx3") - except Exception as exc: - self.enabled = False - logger.warning("Rep announcer disabled, pyttsx3 unavailable: {}", exc) - return + def _audio_path(self, count: int) -> Path: + """获取某个次数对应的音频文件路径""" + suffix = ".aiff" if self._use_macos_say else ".wav" + return self.cache_dir / f"{count}{suffix}" - self._thread = threading.Thread(target=self._run, name="RepAnnouncer", daemon=True) - self._thread.start() - - def _run(self) -> None: - """后台线程:从队列读取文本并调用TTS播放""" + def _clear_pending_counts(self) -> None: + """清空队列中等待播放的次数,避免语音堆积""" while True: - text = self._queue.get() - if text is None: - return - try: - if self._use_macos_say: - if self._current_process is not None and self._current_process.poll() is None: - self._current_process.terminate() - self._current_process = subprocess.Popen( - ["say", "-r", str(self.rate), text], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - elif self._engine is not None: - self._engine.say(text) - self._engine.runAndWait() - except Exception as exc: - logger.warning("Failed to announce rep count {}: {}", text, exc) + item = self._queue.get_nowait() + if item is None: + # close 信号不要吞掉 + self._queue.put(None) + return + except queue.Empty: + return \ No newline at end of file diff --git a/config.yaml b/config.yaml index 48680cb..b97dc42 100644 --- a/config.yaml +++ b/config.yaml @@ -22,6 +22,8 @@ audio: rep_announcer_enabled: true rep_announcer_rate: 185 rep_announcer_volume: 1.0 + rep_max_count: 200 # 预生成语音文件的最大次数 + rep_audio_dir: "" # 空 = 自动使用 app/audio/reps logging: dir: logs diff --git a/configs/models.py b/configs/models.py index 1853318..ee71fcc 100644 --- a/configs/models.py +++ b/configs/models.py @@ -46,6 +46,15 @@ class AudioConfig: rep_announcer_enabled: bool = True rep_announcer_rate: int = 185 rep_announcer_volume: float = 1.0 + rep_max_count: int = 200 + rep_audio_dir: str = "" + + @property + def resolved_audio_dir(self) -> Path: + """返回语音文件目录的绝对路径""" + if self.rep_audio_dir: + return Path(self.rep_audio_dir) + return Path(__file__).resolve().parent.parent / "app" / "audio" / "reps" @dataclass diff --git a/requirements.txt b/requirements.txt index 83fa07a..6a5fef2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ aiortc>=1.9.0 websockets>=13.0 -opencv-contrib-python>=4.13.0.92 -numpy>=2.4.6 +opencv-contrib-python>=4.10.0 +numpy>=1.26,<2 loguru>=0.7.0 -mediapipe==0.10.35 +mediapipe==0.10.21 pyttsx3>=2.99 pyyaml>=6.0