perf(video): 优化视频处理性能监控和音频播放

- 添加视频处理性能计时和统计功能 - 实现帧处理时间监控和慢帧警告 - 添加音频文件静音修剪功能 - 优化Windows平台音频播放实现 - 调整默认日志输出频率减少冗余信息 - 修复MediaPipe GPU委托在Windows上的兼容性问题
2026-06-15 23:13:36 +08:00
parent 6dee2a2ff3
commit 08b6543b79
8 changed files with 387 additions and 48 deletions
@@ -4,6 +4,7 @@ from __future__ import annotations
 import platform
 import shutil
 import subprocess
+import wave
 from pathlib import Path

 from loguru import logger
@@ -15,6 +16,9 @@ def generate_rep_audio_files(
    rate: int,
    output_dir: Path,
    overwrite: bool = False,
+    trim_leading_silence: bool = True,
+    trim_silence_threshold: int = 500,
+    trim_silence_padding_ms: int = 20,
 ) -> None:
    """
    确保 0~max_count 的运动次数语音 wav 文件存在。
@@ -40,29 +44,37 @@ def generate_rep_audio_files(

    if not missing_counts:
        logger.info("Rep audio files already prepared: {}", output_dir)
-        return
-
-    logger.info(
-        "Preparing rep audio files, system={}, count={}, output_dir={}",
-        system,
-        len(missing_counts),
-        output_dir,
-    )
-
-    if system == "darwin":
-        _generate_with_macos_say(
-            counts=missing_counts,
-            output_dir=output_dir,
-            rate=rate,
-        )
    else:
-        _generate_with_pyttsx3(
-            counts=missing_counts,
-            output_dir=output_dir,
-            rate=rate,
+        logger.info(
+            "Preparing rep audio files, system={}, count={}, output_dir={}",
+            system,
+            len(missing_counts),
+            output_dir,
        )

-    logger.info("Rep audio files prepared: {}", output_dir)
+        if system == "darwin":
+            _generate_with_macos_say(
+                counts=missing_counts,
+                output_dir=output_dir,
+                rate=rate,
+            )
+        else:
+            _generate_with_pyttsx3(
+                counts=missing_counts,
+                output_dir=output_dir,
+                rate=rate,
+            )
+
+        logger.info("Rep audio files prepared: {}", output_dir)
+
+    if trim_leading_silence and suffix == ".wav":
+        _trim_leading_silence_files(
+            counts=list(range(0, max_count + 1)),
+            output_dir=output_dir,
+            suffix=suffix,
+            threshold=trim_silence_threshold,
+            padding_ms=trim_silence_padding_ms,
+        )


 def _generate_with_macos_say(
@@ -127,3 +139,89 @@ def _generate_with_pyttsx3(

 def _audio_path(output_dir: Path, count: int, *, suffix: str) -> Path:
    return output_dir / f"{count}{suffix}"
+
+
+def _trim_leading_silence_files(
+    *,
+    counts: list[int],
+    output_dir: Path,
+    suffix: str,
+    threshold: int,
+    padding_ms: int,
+) -> None:
+    trimmed = 0
+    total_removed_ms = 0.0
+
+    for count in counts:
+        audio_file = _audio_path(output_dir, count, suffix=suffix)
+        if not audio_file.exists():
+            continue
+        removed_ms = _trim_leading_silence(audio_file, threshold=threshold, padding_ms=padding_ms)
+        if removed_ms > 0:
+            trimmed += 1
+            total_removed_ms += removed_ms
+
+    logger.info(
+        "Rep audio leading silence trim complete: files_trimmed={}, total_removed_ms={:.1f}, threshold={}, padding_ms={}",
+        trimmed,
+        total_removed_ms,
+        threshold,
+        padding_ms,
+    )
+
+
+def _trim_leading_silence(audio_file: Path, *, threshold: int, padding_ms: int) -> float:
+    with wave.open(str(audio_file), "rb") as reader:
+        params = reader.getparams()
+        frames = reader.readframes(params.nframes)
+
+    frame_size = params.sampwidth * params.nchannels
+    if params.nframes <= 0 or frame_size <= 0:
+        return 0.0
+
+    chunk_frames = max(1, params.framerate // 100)
+    leading_frames = 0
+    offset = 0
+    chunk_size = chunk_frames * frame_size
+
+    while offset < len(frames):
+        chunk = frames[offset : offset + chunk_size]
+        if _pcm_rms(chunk, params.sampwidth) > threshold:
+            break
+        chunk_frame_count = len(chunk) // frame_size
+        leading_frames += chunk_frame_count
+        offset += chunk_size
+
+    padding_frames = int(params.framerate * max(0, padding_ms) / 1000)
+    remove_frames = max(0, leading_frames - padding_frames)
+    if remove_frames <= 0:
+        return 0.0
+
+    start = min(len(frames), remove_frames * frame_size)
+    trimmed_frames = frames[start:]
+    if not trimmed_frames:
+        return 0.0
+
+    with wave.open(str(audio_file), "wb") as writer:
+        writer.setparams(params)
+        writer.writeframes(trimmed_frames)
+
+    return remove_frames / params.framerate * 1000
+
+
+def _pcm_rms(chunk: bytes, sample_width: int) -> float:
+    if not chunk:
+        return 0.0
+
+    if sample_width == 2:
+        sample_count = len(chunk) // 2
+        if sample_count == 0:
+            return 0.0
+        total = 0
+        for i in range(0, sample_count * 2, 2):
+            sample = int.from_bytes(chunk[i : i + 2], "little", signed=True)
+            total += sample * sample
+        return (total / sample_count) ** 0.5
+
+    peak = max(abs(byte - 128) for byte in chunk)
+    return float(peak)
@@ -5,6 +5,7 @@ import shutil
 import subprocess
 import sys
 import threading
+import time
 from pathlib import Path

 from loguru import logger
@@ -24,12 +25,14 @@ class RepAnnouncer:
        self.max_count = max_count
        self.audio_dir = Path(audio_dir)

-        self._queue: queue.Queue[int | None] = queue.Queue()
+        self._queue: queue.Queue[tuple[int, float] | None] = queue.Queue()
        self._thread: threading.Thread | None = None
        self._current_process: subprocess.Popen | None = None
        self._closed = False
+        self._play_lock = threading.Lock()

        self._platform = sys.platform
+        self._direct_playback = self._platform.startswith("win")

        if self.enabled:
            self._start()
@@ -41,8 +44,26 @@ class RepAnnouncer:
        if count <= 0 or count > self.max_count:
            return

+        requested_at = time.perf_counter()
+        if self._direct_playback:
+            audio_file = self._audio_path(count)
+            if not audio_file.exists():
+                logger.warning("Rep audio file missing: {}", audio_file)
+                return
+            try:
+                self._play(audio_file)
+                logger.info(
+                    "Rep audio submitted immediately: count={}, submit_ms={:.1f}",
+                    count,
+                    (time.perf_counter() - requested_at) * 1000,
+                )
+            except Exception as exc:
+                logger.warning("Failed to play rep count {}: {}", count, exc)
+            return
+
        self._clear_pending_counts()
-        self._queue.put(count)
+        self._queue.put((count, requested_at))
+        logger.info("Rep audio queued: count={}", count)

    def close(self) -> None:
        """停止播报线程并释放资源"""
@@ -62,17 +83,24 @@ class RepAnnouncer:
        """启动后台播报线程"""
        self.audio_dir.mkdir(parents=True, exist_ok=True)

+        if self._direct_playback:
+            import winsound
+
+            logger.info("Rep announcer initialized in direct Windows mode, audio_dir={}", self.audio_dir)
+            return
+
        self._thread = threading.Thread(target=self._run, name="RepAnnouncer", daemon=True)
        self._thread.start()

-        logger.info("Rep announcer initialized, audio_dir={}", self.audio_dir)
+        logger.info("Rep announcer initialized in queued mode, audio_dir={}", self.audio_dir)

    def _run(self) -> None:
        """后台线程：从队列取次数，播放对应音频文件"""
        while True:
-            count = self._queue.get()
-            if count is None:
+            item = self._queue.get()
+            if item is None:
                return
+            count, requested_at = item

            audio_file = self._audio_path(count)
            if not audio_file.exists():
@@ -81,33 +109,42 @@ class RepAnnouncer:

            try:
                self._play(audio_file)
+                logger.info(
+                    "Rep audio submitted from queue: count={}, queue_ms={:.1f}",
+                    count,
+                    (time.perf_counter() - requested_at) * 1000,
+                )
            except Exception as exc:
                logger.warning("Failed to play rep count {}: {}", count, exc)

    def _play(self, audio_file: Path) -> None:
        """播放音频文件（平台自适应）"""
-        self._stop_current_playback()
+        with self._play_lock:
+            self._stop_current_playback()

-        if self._platform == "darwin":
-            self._current_process = subprocess.Popen(
-                ["afplay", str(audio_file)],
-                stdout=subprocess.DEVNULL,
-                stderr=subprocess.DEVNULL,
-            )
-        elif self._platform.startswith("win"):
-            import winsound
+            if self._platform == "darwin":
+                self._current_process = subprocess.Popen(
+                    ["afplay", str(audio_file)],
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
+                )
+            elif self._platform.startswith("win"):
+                import winsound

-            winsound.PlaySound(str(audio_file), winsound.SND_FILENAME | winsound.SND_ASYNC)
-        else:
-            player = shutil.which("paplay") or shutil.which("aplay")
-            if player is None:
-                logger.warning("No audio player found")
-                return
-            self._current_process = subprocess.Popen(
-                [player, str(audio_file)],
-                stdout=subprocess.DEVNULL,
-                stderr=subprocess.DEVNULL,
-            )
+                winsound.PlaySound(
+                    str(audio_file),
+                    winsound.SND_FILENAME | winsound.SND_ASYNC | winsound.SND_NODEFAULT,
+                )
+            else:
+                player = shutil.which("paplay") or shutil.which("aplay")
+                if player is None:
+                    logger.warning("No audio player found")
+                    return
+                self._current_process = subprocess.Popen(
+                    [player, str(audio_file)],
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
+                )

    def _stop_current_playback(self) -> None:
        """中断当前正在播放的声音"""