Add audio generation config, refactor rep_announcer

- AudioConfig now includes rep_max_count and rep_audio_dir
- app/audio/generate.py uses config instead of hardcoded constants
- RepAnnouncer rewrote with pre-generated audio cache
- Supports Windows winsound, macOS afplay, Linux paplay/aplay
- Pin requirements back to mediapipe==0.10.21 with numpy<2
This commit is contained in:
2026-06-10 11:42:40 +08:00
parent 1f6c3f3de8
commit b45a8e2e85
5 changed files with 337 additions and 56 deletions
+124
View File
@@ -0,0 +1,124 @@
# app/audio/generate.py
from __future__ import annotations
import platform
import shutil
import subprocess
from pathlib import Path
from loguru import logger
def generate_rep_audio_files(
*,
max_count: int,
rate: int,
output_dir: Path,
overwrite: bool = False,
) -> None:
"""
确保 0~max_count 的运动次数语音 wav 文件存在。
默认生成到:
app/audio/reps/0.wav
app/audio/reps/1.wav
...
app/audio/reps/200.wav
服务启动时调用一次即可。
"""
output_dir.mkdir(parents=True, exist_ok=True)
missing_counts = [
count
for count in range(0, max_count + 1)
if overwrite or not _audio_path(output_dir, count).exists()
]
if not missing_counts:
logger.info("Rep audio files already prepared: {}", output_dir)
return
system = platform.system().lower()
logger.info(
"Preparing rep audio files, system={}, count={}, output_dir={}",
system,
len(missing_counts),
output_dir,
)
if system == "darwin":
_generate_with_macos_say(
counts=missing_counts,
output_dir=output_dir,
rate=rate,
)
else:
_generate_with_pyttsx3(
counts=missing_counts,
output_dir=output_dir,
rate=rate,
)
logger.info("Rep audio files prepared: {}", output_dir)
def _generate_with_macos_say(
*,
counts: list[int],
output_dir: Path,
rate: int,
) -> None:
"""macOS 使用 say 命令生成 wav。"""
if platform.system().lower() != "darwin":
raise RuntimeError("say command is only available on macOS")
if shutil.which("say") is None:
raise RuntimeError("macOS say command not found")
for count in counts:
audio_file = _audio_path(output_dir, count)
subprocess.run(
[
"say",
"-r",
str(rate),
"--file-format=WAVE",
"-o",
str(audio_file),
str(count),
],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=True,
)
def _generate_with_pyttsx3(
*,
counts: list[int],
output_dir: Path,
rate: int,
) -> None:
"""Windows / Linux 使用 pyttsx3 生成 wav。"""
try:
import pyttsx3
except Exception as exc:
raise RuntimeError(f"pyttsx3 unavailable: {exc}") from exc
engine = pyttsx3.init()
engine.setProperty("rate", rate)
engine.setProperty("volume", 1.0)
for count in counts:
audio_file = _audio_path(output_dir, count)
engine.save_to_file(str(count), str(audio_file))
engine.runAndWait()
def _audio_path(output_dir: Path, count: int) -> Path:
return output_dir / f"{count}.wav"
+185 -39
View File
@@ -1,90 +1,236 @@
from __future__ import annotations from __future__ import annotations
import os
import queue import queue
import shutil
import subprocess import subprocess
import sys import sys
import threading import threading
from pathlib import Path
from typing import Any from typing import Any
from loguru import logger from loguru import logger
class RepAnnouncer:
"""运动次数语音播报器"""
def __init__(self, *, enabled: bool = True, rate: int = 185, volume: float = 1.0) -> None: class RepAnnouncer:
"""初始化TTS引擎(macOS用say,其他系统用pyttsx3""" """运动次数语音播报器:预生成 0~200 音频文件,运行时直接播放"""
def __init__(
self,
*,
enabled: bool = True,
rate: int = 185,
volume: float = 1.0,
max_count: int = 200,
cache_dir: str | Path = "runtime/tts_cache/reps",
) -> None:
self.enabled = enabled self.enabled = enabled
self.rate = rate self.rate = rate
self.volume = volume self.volume = volume
self._queue: queue.Queue[str | None] = queue.Queue() self.max_count = max_count
self.cache_dir = Path(cache_dir)
self._queue: queue.Queue[int | None] = queue.Queue()
self._thread: threading.Thread | None = None self._thread: threading.Thread | None = None
self._engine: Any | None = None self._engine: Any | None = None
self._use_macos_say = False
self._current_process: subprocess.Popen | None = None self._current_process: subprocess.Popen | None = None
self._closed = False
self._use_macos_say = sys.platform == "darwin"
self._use_windows_winsound = sys.platform.startswith("win")
if self.enabled: if self.enabled:
self._start() self._start()
def announce_count(self, count: int) -> None: def announce_count(self, count: int) -> None:
"""将次数放入队列进行异步语音播报""" """将次数放入队列,后台线程播放对应音频"""
if not self.enabled or count <= 0: if not self.enabled or self._closed:
return return
while True:
try: if count <= 0 or count > self.max_count:
self._queue.get_nowait() return
except queue.Empty:
break # 保留“只播最新一次”的策略,避免语音堆积
self._queue.put(str(count)) self._clear_pending_counts()
self._queue.put(count)
def close(self) -> None: def close(self) -> None:
"""停止播报线程并释放资源""" """停止播报线程并释放资源"""
if not self.enabled: if not self.enabled or self._closed:
return return
self._closed = True
self._queue.put(None) self._queue.put(None)
if self._thread is not None: if self._thread is not None:
self._thread.join(timeout=1.0) self._thread.join(timeout=1.0)
if self._current_process is not None and self._current_process.poll() is None:
self._current_process.terminate() self._stop_current_playback()
logger.info("Rep announcer closed")
def _start(self) -> None: def _start(self) -> None:
"""根据平台初始化TTS引擎并启动后台播报线程""" """初始化并预生成语音缓存"""
if sys.platform == "darwin": self.cache_dir.mkdir(parents=True, exist_ok=True)
self._use_macos_say = True
logger.info("Rep announcer initialized with macOS say") try:
self._prepare_audio_cache()
except Exception as exc:
self.enabled = False
logger.warning("Rep announcer disabled, failed to prepare audio cache: {}", exc)
return
self._thread = threading.Thread(
target=self._run,
name="RepAnnouncer",
daemon=True,
)
self._thread.start()
logger.info(
"Rep announcer initialized with audio cache, platform={}, max_count={}, cache_dir={}",
sys.platform,
self.max_count,
self.cache_dir,
)
def _prepare_audio_cache(self) -> None:
"""生成 0~max_count 的语音文件,只生成缺失文件"""
if self._use_macos_say:
self._prepare_macos_say_cache()
else: else:
self._prepare_pyttsx3_cache()
def _prepare_macos_say_cache(self) -> None:
"""macOS: 使用 say 预生成 aiff 文件"""
if shutil.which("say") is None:
raise RuntimeError("macOS say command not found")
for count in range(0, self.max_count + 1):
audio_file = self._audio_path(count)
if audio_file.exists():
continue
subprocess.run(
[
"say",
"-r",
str(self.rate),
"-o",
str(audio_file),
str(count),
],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=True,
)
logger.info("macOS say audio cache prepared")
def _prepare_pyttsx3_cache(self) -> None:
"""非 macOS: 使用 pyttsx3 预生成 wav 文件"""
try: try:
import pyttsx3 import pyttsx3
except Exception as exc:
raise RuntimeError(f"pyttsx3 unavailable: {exc}") from exc
self._engine = pyttsx3.init() self._engine = pyttsx3.init()
self._engine.setProperty("rate", self.rate) self._engine.setProperty("rate", self.rate)
self._engine.setProperty("volume", self.volume) self._engine.setProperty("volume", self.volume)
logger.info("Rep announcer initialized with pyttsx3")
except Exception as exc:
self.enabled = False
logger.warning("Rep announcer disabled, pyttsx3 unavailable: {}", exc)
return
self._thread = threading.Thread(target=self._run, name="RepAnnouncer", daemon=True) need_generate = False
self._thread.start()
for count in range(0, self.max_count + 1):
if not self._audio_path(count).exists():
need_generate = True
self._engine.save_to_file(str(count), str(self._audio_path(count)))
if need_generate:
self._engine.runAndWait()
logger.info("pyttsx3 audio cache prepared")
def _run(self) -> None: def _run(self) -> None:
"""后台线程:从队列读取文本并调用TTS播放""" """后台线程:只负责播放已经生成好的音频文件"""
while True: while True:
text = self._queue.get() count = self._queue.get()
if text is None:
if count is None:
return return
try: try:
audio_file = self._audio_path(count)
if not audio_file.exists():
logger.warning("Rep audio file missing: {}", audio_file)
continue
self._play_audio(audio_file)
except Exception as exc:
logger.warning("Failed to play rep count {}: {}", count, exc)
def _play_audio(self, audio_file: Path) -> None:
"""根据平台播放音频"""
self._stop_current_playback()
if self._use_macos_say: if self._use_macos_say:
if self._current_process is not None and self._current_process.poll() is None:
self._current_process.terminate()
self._current_process = subprocess.Popen( self._current_process = subprocess.Popen(
["say", "-r", str(self.rate), text], ["afplay", str(audio_file)],
stdout=subprocess.DEVNULL, stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
) )
elif self._engine is not None: return
self._engine.say(text)
self._engine.runAndWait() if self._use_windows_winsound:
except Exception as exc: import winsound
logger.warning("Failed to announce rep count {}: {}", text, exc)
# SND_ASYNC 表示异步播放;PURGE 会被 _stop_current_playback 调用中断
winsound.PlaySound(str(audio_file), winsound.SND_FILENAME | winsound.SND_ASYNC)
return
# Linux:优先 paplay,其次 aplay
player = shutil.which("paplay") or shutil.which("aplay")
if player is None:
logger.warning("No audio player found, expected paplay or aplay")
return
self._current_process = subprocess.Popen(
[player, str(audio_file)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
def _stop_current_playback(self) -> None:
"""中断当前正在播放的声音"""
if self._use_windows_winsound:
try:
import winsound
winsound.PlaySound(None, winsound.SND_PURGE)
except Exception:
pass
return
if self._current_process is not None and self._current_process.poll() is None:
self._current_process.terminate()
self._current_process = None
def _audio_path(self, count: int) -> Path:
"""获取某个次数对应的音频文件路径"""
suffix = ".aiff" if self._use_macos_say else ".wav"
return self.cache_dir / f"{count}{suffix}"
def _clear_pending_counts(self) -> None:
"""清空队列中等待播放的次数,避免语音堆积"""
while True:
try:
item = self._queue.get_nowait()
if item is None:
# close 信号不要吞掉
self._queue.put(None)
return
except queue.Empty:
return
+2
View File
@@ -22,6 +22,8 @@ audio:
rep_announcer_enabled: true rep_announcer_enabled: true
rep_announcer_rate: 185 rep_announcer_rate: 185
rep_announcer_volume: 1.0 rep_announcer_volume: 1.0
rep_max_count: 200 # 预生成语音文件的最大次数
rep_audio_dir: "" # 空 = 自动使用 app/audio/reps
logging: logging:
dir: logs dir: logs
+9
View File
@@ -46,6 +46,15 @@ class AudioConfig:
rep_announcer_enabled: bool = True rep_announcer_enabled: bool = True
rep_announcer_rate: int = 185 rep_announcer_rate: int = 185
rep_announcer_volume: float = 1.0 rep_announcer_volume: float = 1.0
rep_max_count: int = 200
rep_audio_dir: str = ""
@property
def resolved_audio_dir(self) -> Path:
"""返回语音文件目录的绝对路径"""
if self.rep_audio_dir:
return Path(self.rep_audio_dir)
return Path(__file__).resolve().parent.parent / "app" / "audio" / "reps"
@dataclass @dataclass
+3 -3
View File
@@ -1,8 +1,8 @@
aiortc>=1.9.0 aiortc>=1.9.0
websockets>=13.0 websockets>=13.0
opencv-contrib-python>=4.13.0.92 opencv-contrib-python>=4.10.0
numpy>=2.4.6 numpy>=1.26,<2
loguru>=0.7.0 loguru>=0.7.0
mediapipe==0.10.35 mediapipe==0.10.21
pyttsx3>=2.99 pyttsx3>=2.99
pyyaml>=6.0 pyyaml>=6.0