perf(video): 优化视频处理性能监控和音频播放

- 添加视频处理性能计时和统计功能
- 实现帧处理时间监控和慢帧警告
- 添加音频文件静音修剪功能
- 优化Windows平台音频播放实现
- 调整默认日志输出频率减少冗余信息
- 修复MediaPipe GPU委托在Windows上的兼容性问题
This commit is contained in:
2026-06-15 23:13:36 +08:00
parent 6dee2a2ff3
commit 08b6543b79
8 changed files with 387 additions and 48 deletions
+118 -20
View File
@@ -4,6 +4,7 @@ from __future__ import annotations
import platform
import shutil
import subprocess
import wave
from pathlib import Path
from loguru import logger
@@ -15,6 +16,9 @@ def generate_rep_audio_files(
rate: int,
output_dir: Path,
overwrite: bool = False,
trim_leading_silence: bool = True,
trim_silence_threshold: int = 500,
trim_silence_padding_ms: int = 20,
) -> None:
"""
确保 0~max_count 的运动次数语音 wav 文件存在。
@@ -40,29 +44,37 @@ def generate_rep_audio_files(
if not missing_counts:
logger.info("Rep audio files already prepared: {}", output_dir)
return
logger.info(
"Preparing rep audio files, system={}, count={}, output_dir={}",
system,
len(missing_counts),
output_dir,
)
if system == "darwin":
_generate_with_macos_say(
counts=missing_counts,
output_dir=output_dir,
rate=rate,
)
else:
_generate_with_pyttsx3(
counts=missing_counts,
output_dir=output_dir,
rate=rate,
logger.info(
"Preparing rep audio files, system={}, count={}, output_dir={}",
system,
len(missing_counts),
output_dir,
)
logger.info("Rep audio files prepared: {}", output_dir)
if system == "darwin":
_generate_with_macos_say(
counts=missing_counts,
output_dir=output_dir,
rate=rate,
)
else:
_generate_with_pyttsx3(
counts=missing_counts,
output_dir=output_dir,
rate=rate,
)
logger.info("Rep audio files prepared: {}", output_dir)
if trim_leading_silence and suffix == ".wav":
_trim_leading_silence_files(
counts=list(range(0, max_count + 1)),
output_dir=output_dir,
suffix=suffix,
threshold=trim_silence_threshold,
padding_ms=trim_silence_padding_ms,
)
def _generate_with_macos_say(
@@ -127,3 +139,89 @@ def _generate_with_pyttsx3(
def _audio_path(output_dir: Path, count: int, *, suffix: str) -> Path:
return output_dir / f"{count}{suffix}"
def _trim_leading_silence_files(
*,
counts: list[int],
output_dir: Path,
suffix: str,
threshold: int,
padding_ms: int,
) -> None:
trimmed = 0
total_removed_ms = 0.0
for count in counts:
audio_file = _audio_path(output_dir, count, suffix=suffix)
if not audio_file.exists():
continue
removed_ms = _trim_leading_silence(audio_file, threshold=threshold, padding_ms=padding_ms)
if removed_ms > 0:
trimmed += 1
total_removed_ms += removed_ms
logger.info(
"Rep audio leading silence trim complete: files_trimmed={}, total_removed_ms={:.1f}, threshold={}, padding_ms={}",
trimmed,
total_removed_ms,
threshold,
padding_ms,
)
def _trim_leading_silence(audio_file: Path, *, threshold: int, padding_ms: int) -> float:
with wave.open(str(audio_file), "rb") as reader:
params = reader.getparams()
frames = reader.readframes(params.nframes)
frame_size = params.sampwidth * params.nchannels
if params.nframes <= 0 or frame_size <= 0:
return 0.0
chunk_frames = max(1, params.framerate // 100)
leading_frames = 0
offset = 0
chunk_size = chunk_frames * frame_size
while offset < len(frames):
chunk = frames[offset : offset + chunk_size]
if _pcm_rms(chunk, params.sampwidth) > threshold:
break
chunk_frame_count = len(chunk) // frame_size
leading_frames += chunk_frame_count
offset += chunk_size
padding_frames = int(params.framerate * max(0, padding_ms) / 1000)
remove_frames = max(0, leading_frames - padding_frames)
if remove_frames <= 0:
return 0.0
start = min(len(frames), remove_frames * frame_size)
trimmed_frames = frames[start:]
if not trimmed_frames:
return 0.0
with wave.open(str(audio_file), "wb") as writer:
writer.setparams(params)
writer.writeframes(trimmed_frames)
return remove_frames / params.framerate * 1000
def _pcm_rms(chunk: bytes, sample_width: int) -> float:
if not chunk:
return 0.0
if sample_width == 2:
sample_count = len(chunk) // 2
if sample_count == 0:
return 0.0
total = 0
for i in range(0, sample_count * 2, 2):
sample = int.from_bytes(chunk[i : i + 2], "little", signed=True)
total += sample * sample
return (total / sample_count) ** 0.5
peak = max(abs(byte - 128) for byte in chunk)
return float(peak)