Refactor into modular app structure

Split monolithic files into focused modules: - app/core: settings, logging, lifecycle - app/signaling: websocket server, ICE parser, message models - app/webrtc: peer session, video receiver, frame source - app/vision: pose landmarker wrapper, model config, pose types - app/exercises/dead_bug: detector, metrics, rules, state machine, types - app/rendering: skeleton renderer, status overlay, window display - app/audio: rep announcer - app/diagnostics: perf timer, crash handler - configs: environment-based settings - tests: unit tests for rules, state machine, ICE parser - run.py: entry point
2026-06-10 10:14:43 +08:00
parent 8b878cb9e5
commit 4485cbf702
44 changed files with 1230 additions and 648 deletions
@@ -0,0 +1,14 @@
+from __future__ import annotations
+
+import numpy as np
+from loguru import logger
+
+
+TARGET_WIDTH = 1280
+TARGET_HEIGHT = 720
+
+
+def validate_frame_size(image: np.ndarray, width: int = TARGET_WIDTH, height: int = TARGET_HEIGHT) -> None:
+    h, w = image.shape[:2]
+    if w != width or h != height:
+        logger.warning("Unexpected frame size: {}x{}", w, h)
@@ -0,0 +1,70 @@
+from __future__ import annotations
+
+import asyncio
+import json
+
+import websockets
+from aiortc import RTCPeerConnection, RTCSessionDescription
+from loguru import logger
+
+from app.signaling.ice_parser import parse_ice
+from app.webrtc.video_receiver import VideoReceiver
+
+
+class PeerSession:
+    def __init__(self) -> None:
+        self._pc = RTCPeerConnection()
+        self._video_task: asyncio.Task | None = None
+
+    async def handle(self, websocket) -> None:
+        self._setup_events()
+
+        try:
+            async for message in websocket:
+                data = json.loads(message)
+                msg_type = data.get("type")
+
+                if msg_type == "offer":
+                    offer = RTCSessionDescription(sdp=data["sdp"], type="offer")
+                    await self._pc.setRemoteDescription(offer)
+                    answer = await self._pc.createAnswer()
+                    await self._pc.setLocalDescription(answer)
+                    await websocket.send(json.dumps({
+                        "type": "answer",
+                        "sdp": self._pc.localDescription.sdp,
+                    }))
+
+                elif msg_type == "candidate":
+                    cand = parse_ice(data)
+                    if cand:
+                        await self._pc.addIceCandidate(cand)
+
+        except websockets.ConnectionClosed:
+            pass
+        except Exception as e:
+            logger.exception(f"Error: {e}")
+        finally:
+            await self._cleanup()
+
+    def _setup_events(self) -> None:
+        @self._pc.on("track")
+        async def on_track(track):
+            logger.info(f"Track received: kind={track.kind}")
+            if track.kind == "video":
+                receiver = VideoReceiver(track)
+                self._video_task = asyncio.ensure_future(receiver.run())
+
+        @self._pc.on("iceconnectionstatechange")
+        async def on_iceconnectionstatechange():
+            logger.info(f"ICE state: {self._pc.iceConnectionState}")
+            if self._pc.iceConnectionState in ("failed", "closed", "disconnected"):
+                await self._pc.close()
+
+    async def _cleanup(self) -> None:
+        if self._video_task:
+            self._video_task.cancel()
+            try:
+                await self._video_task
+            except asyncio.CancelledError:
+                pass
+        await self._pc.close()
@@ -0,0 +1,106 @@
+from __future__ import annotations
+
+import asyncio
+import os
+
+import cv2
+from aiortc.mediastreams import MediaStreamError
+from loguru import logger
+
+from app.audio.rep_announcer import RepAnnouncer
+from app.exercises.dead_bug.detector import DeadBugDetector
+from app.rendering.window_display import close_window, is_esc_pressed, show_frame
+from configs.default import (
+    EXTENSION_CONFIRM_FRAMES,
+    MODEL_PATH,
+    PREFER_GPU,
+    PROCESS_EVERY_N_FRAMES,
+    REP_ANNOUNCER_ENABLED,
+    REP_ANNOUNCER_RATE,
+    REP_ANNOUNCER_VOLUME,
+    RESET_CONFIRM_FRAMES,
+    VISIBILITY_THRESHOLD,
+)
+
+
+def _format_pose_debug(pose_result) -> str:
+    metrics = pose_result.metrics
+    if metrics is None:
+        return "metrics=None"
+    return (
+        f"side={pose_result.side}, standard={pose_result.is_standard}, "
+        f"angles(le={metrics.left_elbow_angle:.1f}, re={metrics.right_elbow_angle:.1f}, "
+        f"lk={metrics.left_knee_angle:.1f}, rk={metrics.right_knee_angle:.1f}), "
+        f"extended(la={metrics.left_arm_extended}, ra={metrics.right_arm_extended}, "
+        f"ll={metrics.left_leg_extended}, rl={metrics.right_leg_extended})"
+    )
+
+
+class VideoReceiver:
+    def __init__(self, track) -> None:
+        self._track = track
+
+    async def run(self) -> None:
+        logger.info("Start receiving video frames, process_every_n={}", PROCESS_EVERY_N_FRAMES)
+
+        frame_count = 0
+        processed_count = 0
+        detector = DeadBugDetector(
+            model_path=MODEL_PATH,
+            visibility_threshold=VISIBILITY_THRESHOLD,
+            extension_confirm_frames=EXTENSION_CONFIRM_FRAMES,
+            reset_confirm_frames=RESET_CONFIRM_FRAMES,
+            prefer_gpu=PREFER_GPU,
+        )
+        announcer = RepAnnouncer(
+            enabled=REP_ANNOUNCER_ENABLED,
+            rate=REP_ANNOUNCER_RATE,
+            volume=REP_ANNOUNCER_VOLUME,
+        )
+        last_announced_rep = 0
+        last_pose_result = None
+        last_annotated = None
+
+        try:
+            while True:
+                frame = await self._track.recv()
+                frame_count += 1
+                raw_img = frame.to_ndarray(format="bgr24")
+                timestamp_ms = int(frame.time * 1000) if frame.time is not None else frame_count * 33
+
+                if frame_count % PROCESS_EVERY_N_FRAMES == 0 or last_pose_result is None:
+                    processed_count += 1
+                    last_annotated, last_pose_result = detector.process_frame(raw_img, timestamp_ms)
+                    if last_pose_result.rep_count > last_announced_rep:
+                        last_announced_rep = last_pose_result.rep_count
+                        announcer.announce_count(last_announced_rep)
+
+                display_img = last_annotated if last_annotated is not None else raw_img
+                show_frame(display_img)
+
+                if frame_count % 100 == 0:
+                    logger.info(
+                        "Received {} frames, processed={}, raw_shape={}, reps={}, phase={}, feedback={}, {}",
+                        frame_count,
+                        processed_count,
+                        raw_img.shape,
+                        last_pose_result.rep_count if last_pose_result is not None else 0,
+                        last_pose_result.phase.value if last_pose_result is not None else "none",
+                        " | ".join(last_pose_result.feedback) if last_pose_result is not None else "",
+                        _format_pose_debug(last_pose_result) if last_pose_result is not None else "metrics=None",
+                    )
+
+                if is_esc_pressed():
+                    logger.info("ESC pressed, closing display")
+                    break
+
+        except asyncio.CancelledError:
+            logger.info("Video receive task cancelled")
+        except MediaStreamError:
+            logger.info("Video track ended")
+        except Exception as e:
+            logger.exception(f"Video receive error: {e!r}")
+        finally:
+            announcer.close()
+            detector.close()
+            close_window()