Add audio generation config, refactor rep_announcer

- AudioConfig now includes rep_max_count and rep_audio_dir - app/audio/generate.py uses config instead of hardcoded constants - RepAnnouncer rewrote with pre-generated audio cache - Supports Windows winsound, macOS afplay, Linux paplay/aplay - Pin requirements back to mediapipe==0.10.21 with numpy<2
2026-06-10 11:42:40 +08:00
parent 1f6c3f3de8
commit b45a8e2e85
5 changed files with 337 additions and 56 deletions
@@ -0,0 +1,124 @@
+# app/audio/generate.py
+from __future__ import annotations
+
+import platform
+import shutil
+import subprocess
+from pathlib import Path
+
+from loguru import logger
+
+
+def generate_rep_audio_files(
+    *,
+    max_count: int,
+    rate: int,
+    output_dir: Path,
+    overwrite: bool = False,
+) -> None:
+    """
+    确保 0~max_count 的运动次数语音 wav 文件存在。
+
+    默认生成到：
+
+        app/audio/reps/0.wav
+        app/audio/reps/1.wav
+        ...
+        app/audio/reps/200.wav
+
+    服务启动时调用一次即可。
+    """
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    missing_counts = [
+        count
+        for count in range(0, max_count + 1)
+        if overwrite or not _audio_path(output_dir, count).exists()
+    ]
+
+    if not missing_counts:
+        logger.info("Rep audio files already prepared: {}", output_dir)
+        return
+
+    system = platform.system().lower()
+
+    logger.info(
+        "Preparing rep audio files, system={}, count={}, output_dir={}",
+        system,
+        len(missing_counts),
+        output_dir,
+    )
+
+    if system == "darwin":
+        _generate_with_macos_say(
+            counts=missing_counts,
+            output_dir=output_dir,
+            rate=rate,
+        )
+    else:
+        _generate_with_pyttsx3(
+            counts=missing_counts,
+            output_dir=output_dir,
+            rate=rate,
+        )
+
+    logger.info("Rep audio files prepared: {}", output_dir)
+
+
+def _generate_with_macos_say(
+    *,
+    counts: list[int],
+    output_dir: Path,
+    rate: int,
+) -> None:
+    """macOS 使用 say 命令生成 wav。"""
+    if platform.system().lower() != "darwin":
+        raise RuntimeError("say command is only available on macOS")
+
+    if shutil.which("say") is None:
+        raise RuntimeError("macOS say command not found")
+
+    for count in counts:
+        audio_file = _audio_path(output_dir, count)
+
+        subprocess.run(
+            [
+                "say",
+                "-r",
+                str(rate),
+                "--file-format=WAVE",
+                "-o",
+                str(audio_file),
+                str(count),
+            ],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+            check=True,
+        )
+
+
+def _generate_with_pyttsx3(
+    *,
+    counts: list[int],
+    output_dir: Path,
+    rate: int,
+) -> None:
+    """Windows / Linux 使用 pyttsx3 生成 wav。"""
+    try:
+        import pyttsx3
+    except Exception as exc:
+        raise RuntimeError(f"pyttsx3 unavailable: {exc}") from exc
+
+    engine = pyttsx3.init()
+    engine.setProperty("rate", rate)
+    engine.setProperty("volume", 1.0)
+
+    for count in counts:
+        audio_file = _audio_path(output_dir, count)
+        engine.save_to_file(str(count), str(audio_file))
+
+    engine.runAndWait()
+
+
+def _audio_path(output_dir: Path, count: int) -> Path:
+    return output_dir / f"{count}.wav"
@@ -1,90 +1,236 @@
 from __future__ import annotations

+import os
 import queue
+import shutil
 import subprocess
 import sys
 import threading
+from pathlib import Path
 from typing import Any

 from loguru import logger

-class RepAnnouncer:
-    """运动次数语音播报器"""

-    def __init__(self, *, enabled: bool = True, rate: int = 185, volume: float = 1.0) -> None:
-        """初始化TTS引擎（macOS用say，其他系统用pyttsx3）"""
+class RepAnnouncer:
+    """运动次数语音播报器：预生成 0~200 音频文件，运行时直接播放"""
+
+    def __init__(
+        self,
+        *,
+        enabled: bool = True,
+        rate: int = 185,
+        volume: float = 1.0,
+        max_count: int = 200,
+        cache_dir: str | Path = "runtime/tts_cache/reps",
+    ) -> None:
        self.enabled = enabled
        self.rate = rate
        self.volume = volume
-        self._queue: queue.Queue[str | None] = queue.Queue()
+        self.max_count = max_count
+        self.cache_dir = Path(cache_dir)
+
+        self._queue: queue.Queue[int | None] = queue.Queue()
        self._thread: threading.Thread | None = None
        self._engine: Any | None = None
-        self._use_macos_say = False
        self._current_process: subprocess.Popen | None = None
+        self._closed = False
+
+        self._use_macos_say = sys.platform == "darwin"
+        self._use_windows_winsound = sys.platform.startswith("win")

        if self.enabled:
            self._start()

    def announce_count(self, count: int) -> None:
-        """将次数放入队列进行异步语音播报"""
-        if not self.enabled or count <= 0:
+        """将次数放入队列，后台线程播放对应音频"""
+        if not self.enabled or self._closed:
            return
-        while True:
-            try:
-                self._queue.get_nowait()
-            except queue.Empty:
-                break
-        self._queue.put(str(count))
+
+        if count <= 0 or count > self.max_count:
+            return
+
+        # 保留“只播最新一次”的策略，避免语音堆积
+        self._clear_pending_counts()
+        self._queue.put(count)

    def close(self) -> None:
        """停止播报线程并释放资源"""
-        if not self.enabled:
+        if not self.enabled or self._closed:
            return
+
+        self._closed = True
        self._queue.put(None)
+
        if self._thread is not None:
            self._thread.join(timeout=1.0)
+
+        self._stop_current_playback()
+
+        logger.info("Rep announcer closed")
+
+    def _start(self) -> None:
+        """初始化并预生成语音缓存"""
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+
+        try:
+            self._prepare_audio_cache()
+        except Exception as exc:
+            self.enabled = False
+            logger.warning("Rep announcer disabled, failed to prepare audio cache: {}", exc)
+            return
+
+        self._thread = threading.Thread(
+            target=self._run,
+            name="RepAnnouncer",
+            daemon=True,
+        )
+        self._thread.start()
+
+        logger.info(
+            "Rep announcer initialized with audio cache, platform={}, max_count={}, cache_dir={}",
+            sys.platform,
+            self.max_count,
+            self.cache_dir,
+        )
+
+    def _prepare_audio_cache(self) -> None:
+        """生成 0~max_count 的语音文件，只生成缺失文件"""
+        if self._use_macos_say:
+            self._prepare_macos_say_cache()
+        else:
+            self._prepare_pyttsx3_cache()
+
+    def _prepare_macos_say_cache(self) -> None:
+        """macOS: 使用 say 预生成 aiff 文件"""
+        if shutil.which("say") is None:
+            raise RuntimeError("macOS say command not found")
+
+        for count in range(0, self.max_count + 1):
+            audio_file = self._audio_path(count)
+
+            if audio_file.exists():
+                continue
+
+            subprocess.run(
+                [
+                    "say",
+                    "-r",
+                    str(self.rate),
+                    "-o",
+                    str(audio_file),
+                    str(count),
+                ],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                check=True,
+            )
+
+        logger.info("macOS say audio cache prepared")
+
+    def _prepare_pyttsx3_cache(self) -> None:
+        """非 macOS: 使用 pyttsx3 预生成 wav 文件"""
+        try:
+            import pyttsx3
+        except Exception as exc:
+            raise RuntimeError(f"pyttsx3 unavailable: {exc}") from exc
+
+        self._engine = pyttsx3.init()
+        self._engine.setProperty("rate", self.rate)
+        self._engine.setProperty("volume", self.volume)
+
+        need_generate = False
+
+        for count in range(0, self.max_count + 1):
+            if not self._audio_path(count).exists():
+                need_generate = True
+                self._engine.save_to_file(str(count), str(self._audio_path(count)))
+
+        if need_generate:
+            self._engine.runAndWait()
+
+        logger.info("pyttsx3 audio cache prepared")
+
+    def _run(self) -> None:
+        """后台线程：只负责播放已经生成好的音频文件"""
+        while True:
+            count = self._queue.get()
+
+            if count is None:
+                return
+
+            try:
+                audio_file = self._audio_path(count)
+
+                if not audio_file.exists():
+                    logger.warning("Rep audio file missing: {}", audio_file)
+                    continue
+
+                self._play_audio(audio_file)
+
+            except Exception as exc:
+                logger.warning("Failed to play rep count {}: {}", count, exc)
+
+    def _play_audio(self, audio_file: Path) -> None:
+        """根据平台播放音频"""
+        self._stop_current_playback()
+
+        if self._use_macos_say:
+            self._current_process = subprocess.Popen(
+                ["afplay", str(audio_file)],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+            )
+            return
+
+        if self._use_windows_winsound:
+            import winsound
+
+            # SND_ASYNC 表示异步播放；PURGE 会被 _stop_current_playback 调用中断
+            winsound.PlaySound(str(audio_file), winsound.SND_FILENAME | winsound.SND_ASYNC)
+            return
+
+        # Linux：优先 paplay，其次 aplay
+        player = shutil.which("paplay") or shutil.which("aplay")
+        if player is None:
+            logger.warning("No audio player found, expected paplay or aplay")
+            return
+
+        self._current_process = subprocess.Popen(
+            [player, str(audio_file)],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        )
+
+    def _stop_current_playback(self) -> None:
+        """中断当前正在播放的声音"""
+        if self._use_windows_winsound:
+            try:
+                import winsound
+
+                winsound.PlaySound(None, winsound.SND_PURGE)
+            except Exception:
+                pass
+            return
+
        if self._current_process is not None and self._current_process.poll() is None:
            self._current_process.terminate()

-    def _start(self) -> None:
-        """根据平台初始化TTS引擎并启动后台播报线程"""
-        if sys.platform == "darwin":
-            self._use_macos_say = True
-            logger.info("Rep announcer initialized with macOS say")
-        else:
-            try:
-                import pyttsx3
+        self._current_process = None

-                self._engine = pyttsx3.init()
-                self._engine.setProperty("rate", self.rate)
-                self._engine.setProperty("volume", self.volume)
-                logger.info("Rep announcer initialized with pyttsx3")
-            except Exception as exc:
-                self.enabled = False
-                logger.warning("Rep announcer disabled, pyttsx3 unavailable: {}", exc)
-                return
+    def _audio_path(self, count: int) -> Path:
+        """获取某个次数对应的音频文件路径"""
+        suffix = ".aiff" if self._use_macos_say else ".wav"
+        return self.cache_dir / f"{count}{suffix}"

-        self._thread = threading.Thread(target=self._run, name="RepAnnouncer", daemon=True)
-        self._thread.start()
-
-    def _run(self) -> None:
-        """后台线程：从队列读取文本并调用TTS播放"""
+    def _clear_pending_counts(self) -> None:
+        """清空队列中等待播放的次数，避免语音堆积"""
        while True:
-            text = self._queue.get()
-            if text is None:
-                return
-
            try:
-                if self._use_macos_say:
-                    if self._current_process is not None and self._current_process.poll() is None:
-                        self._current_process.terminate()
-                    self._current_process = subprocess.Popen(
-                        ["say", "-r", str(self.rate), text],
-                        stdout=subprocess.DEVNULL,
-                        stderr=subprocess.DEVNULL,
-                    )
-                elif self._engine is not None:
-                    self._engine.say(text)
-                    self._engine.runAndWait()
-            except Exception as exc:
-                logger.warning("Failed to announce rep count {}: {}", text, exc)
+                item = self._queue.get_nowait()
+                if item is None:
+                    # close 信号不要吞掉
+                    self._queue.put(None)
+                    return
+            except queue.Empty:
+                return
@@ -22,6 +22,8 @@ audio:
  rep_announcer_enabled: true
  rep_announcer_rate: 185
  rep_announcer_volume: 1.0
+  rep_max_count: 200              # 预生成语音文件的最大次数
+  rep_audio_dir: ""               # 空 = 自动使用 app/audio/reps

 logging:
  dir: logs
@@ -46,6 +46,15 @@ class AudioConfig:
    rep_announcer_enabled: bool = True
    rep_announcer_rate: int = 185
    rep_announcer_volume: float = 1.0
+    rep_max_count: int = 200
+    rep_audio_dir: str = ""
+
+    @property
+    def resolved_audio_dir(self) -> Path:
+        """返回语音文件目录的绝对路径"""
+        if self.rep_audio_dir:
+            return Path(self.rep_audio_dir)
+        return Path(__file__).resolve().parent.parent / "app" / "audio" / "reps"


@dataclass
@@ -1,8 +1,8 @@
 aiortc>=1.9.0
 websockets>=13.0
-opencv-contrib-python>=4.13.0.92
-numpy>=2.4.6
+opencv-contrib-python>=4.10.0
+numpy>=1.26,<2
 loguru>=0.7.0
-mediapipe==0.10.35
+mediapipe==0.10.21
 pyttsx3>=2.99
 pyyaml>=6.0