perf(video): 优化视频处理性能监控和音频播放

- 添加视频处理性能计时和统计功能
- 实现帧处理时间监控和慢帧警告
- 添加音频文件静音修剪功能
- 优化Windows平台音频播放实现
- 调整默认日志输出频率减少冗余信息
- 修复MediaPipe GPU委托在Windows上的兼容性问题
This commit is contained in:
2026-06-15 23:13:36 +08:00
parent 6dee2a2ff3
commit 08b6543b79
8 changed files with 387 additions and 48 deletions
+118 -20
View File
@@ -4,6 +4,7 @@ from __future__ import annotations
import platform import platform
import shutil import shutil
import subprocess import subprocess
import wave
from pathlib import Path from pathlib import Path
from loguru import logger from loguru import logger
@@ -15,6 +16,9 @@ def generate_rep_audio_files(
rate: int, rate: int,
output_dir: Path, output_dir: Path,
overwrite: bool = False, overwrite: bool = False,
trim_leading_silence: bool = True,
trim_silence_threshold: int = 500,
trim_silence_padding_ms: int = 20,
) -> None: ) -> None:
""" """
确保 0~max_count 的运动次数语音 wav 文件存在。 确保 0~max_count 的运动次数语音 wav 文件存在。
@@ -40,29 +44,37 @@ def generate_rep_audio_files(
if not missing_counts: if not missing_counts:
logger.info("Rep audio files already prepared: {}", output_dir) logger.info("Rep audio files already prepared: {}", output_dir)
return
logger.info(
"Preparing rep audio files, system={}, count={}, output_dir={}",
system,
len(missing_counts),
output_dir,
)
if system == "darwin":
_generate_with_macos_say(
counts=missing_counts,
output_dir=output_dir,
rate=rate,
)
else: else:
_generate_with_pyttsx3( logger.info(
counts=missing_counts, "Preparing rep audio files, system={}, count={}, output_dir={}",
output_dir=output_dir, system,
rate=rate, len(missing_counts),
output_dir,
) )
logger.info("Rep audio files prepared: {}", output_dir) if system == "darwin":
_generate_with_macos_say(
counts=missing_counts,
output_dir=output_dir,
rate=rate,
)
else:
_generate_with_pyttsx3(
counts=missing_counts,
output_dir=output_dir,
rate=rate,
)
logger.info("Rep audio files prepared: {}", output_dir)
if trim_leading_silence and suffix == ".wav":
_trim_leading_silence_files(
counts=list(range(0, max_count + 1)),
output_dir=output_dir,
suffix=suffix,
threshold=trim_silence_threshold,
padding_ms=trim_silence_padding_ms,
)
def _generate_with_macos_say( def _generate_with_macos_say(
@@ -127,3 +139,89 @@ def _generate_with_pyttsx3(
def _audio_path(output_dir: Path, count: int, *, suffix: str) -> Path: def _audio_path(output_dir: Path, count: int, *, suffix: str) -> Path:
return output_dir / f"{count}{suffix}" return output_dir / f"{count}{suffix}"
def _trim_leading_silence_files(
*,
counts: list[int],
output_dir: Path,
suffix: str,
threshold: int,
padding_ms: int,
) -> None:
trimmed = 0
total_removed_ms = 0.0
for count in counts:
audio_file = _audio_path(output_dir, count, suffix=suffix)
if not audio_file.exists():
continue
removed_ms = _trim_leading_silence(audio_file, threshold=threshold, padding_ms=padding_ms)
if removed_ms > 0:
trimmed += 1
total_removed_ms += removed_ms
logger.info(
"Rep audio leading silence trim complete: files_trimmed={}, total_removed_ms={:.1f}, threshold={}, padding_ms={}",
trimmed,
total_removed_ms,
threshold,
padding_ms,
)
def _trim_leading_silence(audio_file: Path, *, threshold: int, padding_ms: int) -> float:
with wave.open(str(audio_file), "rb") as reader:
params = reader.getparams()
frames = reader.readframes(params.nframes)
frame_size = params.sampwidth * params.nchannels
if params.nframes <= 0 or frame_size <= 0:
return 0.0
chunk_frames = max(1, params.framerate // 100)
leading_frames = 0
offset = 0
chunk_size = chunk_frames * frame_size
while offset < len(frames):
chunk = frames[offset : offset + chunk_size]
if _pcm_rms(chunk, params.sampwidth) > threshold:
break
chunk_frame_count = len(chunk) // frame_size
leading_frames += chunk_frame_count
offset += chunk_size
padding_frames = int(params.framerate * max(0, padding_ms) / 1000)
remove_frames = max(0, leading_frames - padding_frames)
if remove_frames <= 0:
return 0.0
start = min(len(frames), remove_frames * frame_size)
trimmed_frames = frames[start:]
if not trimmed_frames:
return 0.0
with wave.open(str(audio_file), "wb") as writer:
writer.setparams(params)
writer.writeframes(trimmed_frames)
return remove_frames / params.framerate * 1000
def _pcm_rms(chunk: bytes, sample_width: int) -> float:
if not chunk:
return 0.0
if sample_width == 2:
sample_count = len(chunk) // 2
if sample_count == 0:
return 0.0
total = 0
for i in range(0, sample_count * 2, 2):
sample = int.from_bytes(chunk[i : i + 2], "little", signed=True)
total += sample * sample
return (total / sample_count) ** 0.5
peak = max(abs(byte - 128) for byte in chunk)
return float(peak)
+62 -25
View File
@@ -5,6 +5,7 @@ import shutil
import subprocess import subprocess
import sys import sys
import threading import threading
import time
from pathlib import Path from pathlib import Path
from loguru import logger from loguru import logger
@@ -24,12 +25,14 @@ class RepAnnouncer:
self.max_count = max_count self.max_count = max_count
self.audio_dir = Path(audio_dir) self.audio_dir = Path(audio_dir)
self._queue: queue.Queue[int | None] = queue.Queue() self._queue: queue.Queue[tuple[int, float] | None] = queue.Queue()
self._thread: threading.Thread | None = None self._thread: threading.Thread | None = None
self._current_process: subprocess.Popen | None = None self._current_process: subprocess.Popen | None = None
self._closed = False self._closed = False
self._play_lock = threading.Lock()
self._platform = sys.platform self._platform = sys.platform
self._direct_playback = self._platform.startswith("win")
if self.enabled: if self.enabled:
self._start() self._start()
@@ -41,8 +44,26 @@ class RepAnnouncer:
if count <= 0 or count > self.max_count: if count <= 0 or count > self.max_count:
return return
requested_at = time.perf_counter()
if self._direct_playback:
audio_file = self._audio_path(count)
if not audio_file.exists():
logger.warning("Rep audio file missing: {}", audio_file)
return
try:
self._play(audio_file)
logger.info(
"Rep audio submitted immediately: count={}, submit_ms={:.1f}",
count,
(time.perf_counter() - requested_at) * 1000,
)
except Exception as exc:
logger.warning("Failed to play rep count {}: {}", count, exc)
return
self._clear_pending_counts() self._clear_pending_counts()
self._queue.put(count) self._queue.put((count, requested_at))
logger.info("Rep audio queued: count={}", count)
def close(self) -> None: def close(self) -> None:
"""停止播报线程并释放资源""" """停止播报线程并释放资源"""
@@ -62,17 +83,24 @@ class RepAnnouncer:
"""启动后台播报线程""" """启动后台播报线程"""
self.audio_dir.mkdir(parents=True, exist_ok=True) self.audio_dir.mkdir(parents=True, exist_ok=True)
if self._direct_playback:
import winsound
logger.info("Rep announcer initialized in direct Windows mode, audio_dir={}", self.audio_dir)
return
self._thread = threading.Thread(target=self._run, name="RepAnnouncer", daemon=True) self._thread = threading.Thread(target=self._run, name="RepAnnouncer", daemon=True)
self._thread.start() self._thread.start()
logger.info("Rep announcer initialized, audio_dir={}", self.audio_dir) logger.info("Rep announcer initialized in queued mode, audio_dir={}", self.audio_dir)
def _run(self) -> None: def _run(self) -> None:
"""后台线程:从队列取次数,播放对应音频文件""" """后台线程:从队列取次数,播放对应音频文件"""
while True: while True:
count = self._queue.get() item = self._queue.get()
if count is None: if item is None:
return return
count, requested_at = item
audio_file = self._audio_path(count) audio_file = self._audio_path(count)
if not audio_file.exists(): if not audio_file.exists():
@@ -81,33 +109,42 @@ class RepAnnouncer:
try: try:
self._play(audio_file) self._play(audio_file)
logger.info(
"Rep audio submitted from queue: count={}, queue_ms={:.1f}",
count,
(time.perf_counter() - requested_at) * 1000,
)
except Exception as exc: except Exception as exc:
logger.warning("Failed to play rep count {}: {}", count, exc) logger.warning("Failed to play rep count {}: {}", count, exc)
def _play(self, audio_file: Path) -> None: def _play(self, audio_file: Path) -> None:
"""播放音频文件(平台自适应)""" """播放音频文件(平台自适应)"""
self._stop_current_playback() with self._play_lock:
self._stop_current_playback()
if self._platform == "darwin": if self._platform == "darwin":
self._current_process = subprocess.Popen( self._current_process = subprocess.Popen(
["afplay", str(audio_file)], ["afplay", str(audio_file)],
stdout=subprocess.DEVNULL, stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
) )
elif self._platform.startswith("win"): elif self._platform.startswith("win"):
import winsound import winsound
winsound.PlaySound(str(audio_file), winsound.SND_FILENAME | winsound.SND_ASYNC) winsound.PlaySound(
else: str(audio_file),
player = shutil.which("paplay") or shutil.which("aplay") winsound.SND_FILENAME | winsound.SND_ASYNC | winsound.SND_NODEFAULT,
if player is None: )
logger.warning("No audio player found") else:
return player = shutil.which("paplay") or shutil.which("aplay")
self._current_process = subprocess.Popen( if player is None:
[player, str(audio_file)], logger.warning("No audio player found")
stdout=subprocess.DEVNULL, return
stderr=subprocess.DEVNULL, self._current_process = subprocess.Popen(
) [player, str(audio_file)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
def _stop_current_playback(self) -> None: def _stop_current_playback(self) -> None:
"""中断当前正在播放的声音""" """中断当前正在播放的声音"""
+3
View File
@@ -16,4 +16,7 @@ def startup() -> None:
rate=config.audio.rep_announcer_rate, rate=config.audio.rep_announcer_rate,
output_dir=config.audio.resolved_audio_dir, output_dir=config.audio.resolved_audio_dir,
overwrite=False, overwrite=False,
trim_leading_silence=config.audio.trim_leading_silence,
trim_silence_threshold=config.audio.trim_silence_threshold,
trim_silence_padding_ms=config.audio.trim_silence_padding_ms,
) )
+75
View File
@@ -52,6 +52,7 @@ class DeadBugDetector:
self._result_event = threading.Event() self._result_event = threading.Event()
self._inflight = False self._inflight = False
self._inflight_started_at = 0.0 self._inflight_started_at = 0.0
self.last_timing: dict[str, float | bool] = {}
def on_result(pose_result, _image, _timestamp_ms): def on_result(pose_result, _image, _timestamp_ms):
with self._result_lock: with self._result_lock:
@@ -79,7 +80,9 @@ class DeadBugDetector:
def process_frame(self, bgr_frame: np.ndarray, timestamp_ms: int) -> tuple[np.ndarray, DeadBugResult]: def process_frame(self, bgr_frame: np.ndarray, timestamp_ms: int) -> tuple[np.ndarray, DeadBugResult]:
"""处理单帧:姿态检测、指标计算、状态机更新、可视化叠加""" """处理单帧:姿态检测、指标计算、状态机更新、可视化叠加"""
total_started = time.perf_counter()
timestamp_ms = self._normalize_timestamp(timestamp_ms) timestamp_ms = self._normalize_timestamp(timestamp_ms)
normalize_done = time.perf_counter()
with self._result_lock: with self._result_lock:
if self._inflight and time.monotonic() - self._inflight_started_at > 0.5: if self._inflight and time.monotonic() - self._inflight_started_at > 0.5:
@@ -90,9 +93,11 @@ class DeadBugDetector:
if should_submit: if should_submit:
self._inflight = True self._inflight = True
self._inflight_started_at = time.monotonic() self._inflight_started_at = time.monotonic()
lock_done = time.perf_counter()
if should_submit: if should_submit:
rgba_frame = bgr_to_rgba(bgr_frame) rgba_frame = bgr_to_rgba(bgr_frame)
convert_done = time.perf_counter()
mp_image = mp.Image(image_format=mp.ImageFormat.SRGBA, data=rgba_frame) mp_image = mp.Image(image_format=mp.ImageFormat.SRGBA, data=rgba_frame)
self._result_event.clear() self._result_event.clear()
try: try:
@@ -102,12 +107,20 @@ class DeadBugDetector:
self._inflight = False self._inflight = False
self._inflight_started_at = 0.0 self._inflight_started_at = 0.0
raise raise
submit_done = time.perf_counter()
self._result_event.wait(timeout=0.08) self._result_event.wait(timeout=0.08)
wait_done = time.perf_counter()
else:
convert_done = lock_done
submit_done = lock_done
wait_done = lock_done
with self._result_lock: with self._result_lock:
pose_result = self._latest_result pose_result = self._latest_result
result_read_done = time.perf_counter()
annotated = bgr_frame.copy() annotated = bgr_frame.copy()
copy_done = time.perf_counter()
if pose_result is None or not pose_result.pose_landmarks: if pose_result is None or not pose_result.pose_landmarks:
self._state.mark_no_pose() self._state.mark_no_pose()
@@ -120,6 +133,18 @@ class DeadBugDetector:
metrics=None, metrics=None,
) )
draw_status_overlay(annotated, result) draw_status_overlay(annotated, result)
self._record_timing(
total_started,
normalize_done,
lock_done,
convert_done,
submit_done,
wait_done,
result_read_done,
copy_done,
time.perf_counter(),
should_submit,
)
return annotated, result return annotated, result
landmarks = [Point(lm.x, lm.y, lm.z, getattr(lm, "visibility", 1.0)) for lm in pose_result.pose_landmarks[0]] landmarks = [Point(lm.x, lm.y, lm.z, getattr(lm, "visibility", 1.0)) for lm in pose_result.pose_landmarks[0]]
@@ -136,6 +161,18 @@ class DeadBugDetector:
metrics=None, metrics=None,
) )
draw_status_overlay(annotated, result) draw_status_overlay(annotated, result)
self._record_timing(
total_started,
normalize_done,
lock_done,
convert_done,
submit_done,
wait_done,
result_read_done,
copy_done,
time.perf_counter(),
should_submit,
)
return annotated, result return annotated, result
raw = calculate_metrics( raw = calculate_metrics(
@@ -169,8 +206,46 @@ class DeadBugDetector:
result = self._state.update(metrics) result = self._state.update(metrics)
draw_status_overlay(annotated, result) draw_status_overlay(annotated, result)
self._record_timing(
total_started,
normalize_done,
lock_done,
convert_done,
submit_done,
wait_done,
result_read_done,
copy_done,
time.perf_counter(),
should_submit,
)
return annotated, result return annotated, result
def _record_timing(
self,
total_started: float,
normalize_done: float,
lock_done: float,
convert_done: float,
submit_done: float,
wait_done: float,
result_read_done: float,
copy_done: float,
finished: float,
submitted: bool,
) -> None:
self.last_timing = {
"total_ms": (finished - total_started) * 1000,
"timestamp_ms": (normalize_done - total_started) * 1000,
"lock_ms": (lock_done - normalize_done) * 1000,
"convert_ms": (convert_done - lock_done) * 1000,
"submit_ms": (submit_done - convert_done) * 1000,
"wait_ms": (wait_done - submit_done) * 1000,
"result_read_ms": (result_read_done - wait_done) * 1000,
"copy_ms": (copy_done - result_read_done) * 1000,
"postprocess_draw_ms": (finished - copy_done) * 1000,
"submitted": submitted,
}
def _normalize_timestamp(self, timestamp_ms: int) -> int: def _normalize_timestamp(self, timestamp_ms: int) -> int:
"""确保时间戳严格递增(MediaPipe要求)""" """确保时间戳严格递增(MediaPipe要求)"""
if timestamp_ms <= self._last_timestamp_ms: if timestamp_ms <= self._last_timestamp_ms:
+6
View File
@@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import platform
from typing import Callable from typing import Callable
import mediapipe as mp import mediapipe as mp
@@ -27,6 +28,11 @@ class PoseLandmarkerWrapper:
if prefer_gpu: if prefer_gpu:
try: try:
if platform.system() == "Windows":
logger.warning(
"MediaPipe GPU delegate requested, but MediaPipe Tasks Python does not support GPU delegate on Windows; "
"Intel Iris Xe cannot be used by this backend and CPU fallback is expected"
)
self.delegate = BaseOptions.Delegate.GPU self.delegate = BaseOptions.Delegate.GPU
self._landmarker = self._create(self.delegate, result_callback) self._landmarker = self._create(self.delegate, result_callback)
logger.info("MediaPipe PoseLandmarker initialized with GPU delegate") logger.info("MediaPipe PoseLandmarker initialized with GPU delegate")
+111 -1
View File
@@ -1,6 +1,7 @@
from __future__ import annotations from __future__ import annotations
import asyncio import asyncio
import time
import cv2 import cv2
from aiortc.mediastreams import MediaStreamError from aiortc.mediastreams import MediaStreamError
@@ -25,6 +26,39 @@ def _format_pose_debug(pose_result) -> str:
f"ll={metrics.left_leg_extended}, rl={metrics.right_leg_extended})" f"ll={metrics.left_leg_extended}, rl={metrics.right_leg_extended})"
) )
def _new_perf_window() -> dict:
return {
"frames": 0,
"processed": 0,
"loop_ms": 0.0,
"to_ndarray_ms": 0.0,
"detect_ms": 0.0,
"show_ms": 0.0,
"max_loop_ms": 0.0,
"max_detect_ms": 0.0,
"detector": {},
}
def _add_detector_timing(perf: dict, timing: dict[str, float | bool]) -> None:
detector = perf["detector"]
for key, value in timing.items():
if key == "submitted":
detector[key] = detector.get(key, 0) + (1 if value else 0)
continue
value = float(value)
detector[key] = detector.get(key, 0.0) + value
max_key = f"max_{key}"
detector[max_key] = max(detector.get(max_key, 0.0), value)
def _avg(perf: dict, key: str, denominator: int) -> float:
if denominator <= 0:
return 0.0
return perf.get(key, 0.0) / denominator
class VideoReceiver: class VideoReceiver:
"""视频轨道接收与运动检测流水线""" """视频轨道接收与运动检测流水线"""
@@ -34,10 +68,19 @@ class VideoReceiver:
async def run(self) -> None: async def run(self) -> None:
"""持续接收视频帧并进行姿态检测、渲染和语音播报""" """持续接收视频帧并进行姿态检测、渲染和语音播报"""
log_every_n_frames = max(1, config.video.log_every_n_frames) log_every_n_frames = max(1, config.video.log_every_n_frames)
perf_log_every_n_frames = max(1, config.video.perf_log_every_n_frames)
slow_frame_ms = max(0.0, config.video.slow_frame_ms)
logger.info( logger.info(
"Start receiving video frames, process_every_n={}, log_every_n={}", "Start receiving video frames, process_every_n={}, log_every_n={}, perf_log_every_n={}, slow_frame_ms={}",
config.video.process_every_n_frames, config.video.process_every_n_frames,
log_every_n_frames, log_every_n_frames,
perf_log_every_n_frames,
slow_frame_ms,
)
logger.info(
"OpenCV OpenCL status: have_opencl={}, use_opencl={}",
cv2.ocl.haveOpenCL(),
cv2.ocl.useOpenCL(),
) )
frame_count = 0 frame_count = 0
@@ -57,23 +100,43 @@ class VideoReceiver:
last_announced_rep = 0 last_announced_rep = 0
last_pose_result = None last_pose_result = None
last_annotated = None last_annotated = None
perf = _new_perf_window()
try: try:
while True: while True:
loop_started = time.perf_counter()
frame = await self._track.recv() frame = await self._track.recv()
frame_count += 1 frame_count += 1
recv_done = time.perf_counter()
raw_img = frame.to_ndarray(format="bgr24") raw_img = frame.to_ndarray(format="bgr24")
ndarray_done = time.perf_counter()
timestamp_ms = int(frame.time * 1000) if frame.time is not None else frame_count * 33 timestamp_ms = int(frame.time * 1000) if frame.time is not None else frame_count * 33
detect_ms = 0.0
if frame_count % config.video.process_every_n_frames == 0 or last_pose_result is None: if frame_count % config.video.process_every_n_frames == 0 or last_pose_result is None:
detect_started = time.perf_counter()
processed_count += 1 processed_count += 1
last_annotated, last_pose_result = detector.process_frame(raw_img, timestamp_ms) last_annotated, last_pose_result = detector.process_frame(raw_img, timestamp_ms)
detect_ms = (time.perf_counter() - detect_started) * 1000
perf["processed"] += 1
perf["detect_ms"] += detect_ms
perf["max_detect_ms"] = max(perf["max_detect_ms"], detect_ms)
_add_detector_timing(perf, detector.last_timing)
if last_pose_result.rep_count > last_announced_rep: if last_pose_result.rep_count > last_announced_rep:
last_announced_rep = last_pose_result.rep_count last_announced_rep = last_pose_result.rep_count
announce_started = time.perf_counter()
announcer.announce_count(last_announced_rep) announcer.announce_count(last_announced_rep)
logger.info(
"Rep completed and audio requested: count={}, frame={}, announce_call_ms={:.1f}",
last_announced_rep,
frame_count,
(time.perf_counter() - announce_started) * 1000,
)
display_img = last_annotated if last_annotated is not None else raw_img display_img = last_annotated if last_annotated is not None else raw_img
show_started = time.perf_counter()
show_frame(display_img) show_frame(display_img)
show_done = time.perf_counter()
if frame_count % log_every_n_frames == 0: if frame_count % log_every_n_frames == 0:
logger.info( logger.info(
@@ -87,6 +150,53 @@ class VideoReceiver:
_format_pose_debug(last_pose_result) if last_pose_result is not None else "metrics=None", _format_pose_debug(last_pose_result) if last_pose_result is not None else "metrics=None",
) )
loop_ms = (show_done - loop_started) * 1000
to_ndarray_ms = (ndarray_done - recv_done) * 1000
show_ms = (show_done - show_started) * 1000
perf["frames"] += 1
perf["loop_ms"] += loop_ms
perf["to_ndarray_ms"] += to_ndarray_ms
perf["show_ms"] += show_ms
perf["max_loop_ms"] = max(perf["max_loop_ms"], loop_ms)
if slow_frame_ms and loop_ms >= slow_frame_ms:
logger.warning(
"Slow video frame: frame={}, loop_ms={:.1f}, detect_ms={:.1f}, to_ndarray_ms={:.1f}, show_ms={:.1f}, shape={}",
frame_count,
loop_ms,
detect_ms,
to_ndarray_ms,
show_ms,
raw_img.shape,
)
if frame_count % perf_log_every_n_frames == 0:
frames = perf["frames"]
processed = perf["processed"]
detector_perf = perf["detector"]
logger.info(
"Perf window: frames={}, processed={}, avg_loop_ms={:.1f}, max_loop_ms={:.1f}, avg_to_ndarray_ms={:.1f}, "
"avg_detect_ms={:.1f}, max_detect_ms={:.1f}, avg_show_ms={:.1f}, detector_avg_total_ms={:.1f}, "
"detector_max_total_ms={:.1f}, detector_avg_wait_ms={:.1f}, detector_max_wait_ms={:.1f}, "
"detector_avg_convert_ms={:.1f}, detector_avg_postprocess_draw_ms={:.1f}, detector_submitted={}",
frames,
processed,
_avg(perf, "loop_ms", frames),
perf["max_loop_ms"],
_avg(perf, "to_ndarray_ms", frames),
_avg(perf, "detect_ms", processed),
perf["max_detect_ms"],
_avg(perf, "show_ms", frames),
_avg(detector_perf, "total_ms", processed),
detector_perf.get("max_total_ms", 0.0),
_avg(detector_perf, "wait_ms", processed),
detector_perf.get("max_wait_ms", 0.0),
_avg(detector_perf, "convert_ms", processed),
_avg(detector_perf, "postprocess_draw_ms", processed),
detector_perf.get("submitted", 0),
)
perf = _new_perf_window()
if is_esc_pressed(): if is_esc_pressed():
logger.info("ESC pressed, closing display") logger.info("ESC pressed, closing display")
break break
+6 -1
View File
@@ -8,7 +8,9 @@ server:
video: video:
process_every_n_frames: 2 process_every_n_frames: 2
log_every_n_frames: 2 log_every_n_frames: 30
perf_log_every_n_frames: 30
slow_frame_ms: 100
model: model:
path: "./pose_models/pose_landmarker_full.task" path: "./pose_models/pose_landmarker_full.task"
@@ -25,6 +27,9 @@ audio:
rep_announcer_volume: 1.0 rep_announcer_volume: 1.0
rep_max_count: 200 # 预生成语音文件的最大次数 rep_max_count: 200 # 预生成语音文件的最大次数
rep_audio_dir: "" # 空 = 自动使用 app/audio/reps rep_audio_dir: "" # 空 = 自动使用 app/audio/reps
trim_leading_silence: true
trim_silence_threshold: 500
trim_silence_padding_ms: 20
logging: logging:
dir: logs dir: logs
+6 -1
View File
@@ -16,7 +16,9 @@ class ServerConfig:
class VideoConfig: class VideoConfig:
"""视频帧处理配置""" """视频帧处理配置"""
process_every_n_frames: int = 2 process_every_n_frames: int = 2
log_every_n_frames: int = 2 log_every_n_frames: int = 30
perf_log_every_n_frames: int = 30
slow_frame_ms: float = 100.0
@dataclass @dataclass
@@ -49,6 +51,9 @@ class AudioConfig:
rep_announcer_volume: float = 1.0 rep_announcer_volume: float = 1.0
rep_max_count: int = 200 rep_max_count: int = 200
rep_audio_dir: str = "" rep_audio_dir: str = ""
trim_leading_silence: bool = True
trim_silence_threshold: int = 500
trim_silence_padding_ms: int = 20
@property @property
def resolved_audio_dir(self) -> Path: def resolved_audio_dir(self) -> Path: