1

2024-12-02 00:26:05 +08:00
parent f1836172d6
commit 5e492e4d8f
2 changed files with 3 additions and 19 deletions
--- a/src/engine/dizhu/dizhu_engine.py
+++ b/src/engine/dizhu/dizhu_engine.py
@@ -43,8 +43,6 @@ class DiZhuEngine:
    def get_current_player(self):
        """获取当前玩家对象"""
        current_player = self.players[self.current_player_index]
-        # logger.info(f"当前玩家: 玩家 {self.current_player_index + 1} ({current_player.role})")
-        # logger.info(f"当前玩家手牌: {current_player.get_hand_cards_as_strings()}")
        return current_player

    def step(self, action):
@@ -55,7 +53,6 @@ class DiZhuEngine:
        current_player = self.get_current_player()

        if action == "pass":
-            # logger.info(f"玩家 {self.current_player_index + 1} 选择过牌")
            self.pass_count += 1

            # 如果所有其他玩家都过牌，允许最后出牌玩家再次出牌
@@ -63,18 +60,15 @@ class DiZhuEngine:
                self.current_player_index = self.last_player
                self.pass_count = 0  # 重置过牌计数
                self.current_pile = None  # 清空当前牌面
-                # logger.info(f"所有玩家过牌，玩家 {self.current_player_index + 1} 可以继续出牌")
        else:
            # 出牌逻辑
            if not isinstance(action, list):
                action = [action]

            if not all(card in current_player.hand_cards for card in action):
-                # logger.error(f"玩家 {self.current_player_index + 1} 的动作非法: {action}")
                raise ValueError(f"玩家手牌不足以完成此次出牌: {action}")

            if self.current_pile and not self._can_beat(self.current_pile, action):
-                # logger.error(f"玩家 {self.current_player_index + 1} 出牌非法: {action}")
                raise ValueError(f"出牌无法打过当前牌面: {action}")

            # 出牌成功
@@ -82,7 +76,6 @@ class DiZhuEngine:
            self.pass_count = 0  # 出牌后重置过牌计数
            self.last_player = self.current_player_index  # 更新最后出牌的玩家

-            # logger.info(f"玩家 {self.current_player_index + 1} 出牌: {[card_to_string(card) for card in action]}")

            # 从手牌中移除
            for card in action:
@@ -97,7 +90,6 @@ class DiZhuEngine:

        # 切换到下一个玩家
        self.current_player_index = (self.current_player_index + 1) % 3
-        logger.info(f"切换到玩家 {self.current_player_index + 1}")


    def get_action_space(self):
@@ -172,11 +164,6 @@ class DiZhuEngine:
            "game_over": self.game_over,
        }
        logger.info("当前游戏状态: ")
-        # logger.info(f"地主牌: {[card_to_string(card) for card in self.landlord_cards]}")
-        # for i, player in enumerate(self.players):
-            # logger.info(f"玩家 {i + 1} ({player.role}) 手牌: {player.get_hand_cards_as_strings()}")
-            # logger.info(f"玩家 {i + 1} 出牌历史: {[card_to_string(card) for play in player.history for card in play]}")
-        # logger.info(f"当前玩家索引: {self.current_player_index}")
        logger.info(f"游戏是否结束: {self.game_over}")
        return state

--- a/src/environment/dizhu_env.py
+++ b/src/environment/dizhu_env.py
@@ -30,16 +30,15 @@ class DouDiZhuEnv(gym.Env):
        try:
            reward = 0  # 初始化奖励
            current_player = self.engine.get_current_player()
-            # logger.info(f"当前玩家: 玩家 {self.engine.current_player_index + 1} 手牌: {current_player.get_hand_cards_as_strings()}")
+

            if action == 0:  # 过牌
-                # logger.info(f"玩家 {self.engine.current_player_index + 1} 选择过牌")
                self.engine.step("pass")
                reward -= 0.5  # 对频繁过牌给予轻微惩罚
            else:
                # 玩家选择出牌
                action_cards = self._decode_action(action)  # 解码动作为具体的牌型
-                # logger.info(f"玩家 {self.engine.current_player_index + 1} 选择出牌: {action_cards}")
+

                # 出牌前的手牌数量
                previous_hand_count = len(current_player.hand_cards)
@@ -57,13 +56,11 @@ class DouDiZhuEnv(gym.Env):
            done = self.engine.game_over
            if done:
                reward += 10  # 胜利时给予较大的奖励
-                # logger.info(f"游戏结束！胜利玩家: {self.engine.current_player_index + 1}")
+                logger.info(f"游戏结束！胜利玩家: {self.engine.current_player_index + 1}")

            return self._get_observation(), reward, done, {}

        except ValueError as e:
-            # 对无效动作设置较大的负奖励
-            # logger.error(f"无效动作: {e}")
            return self._get_observation(), -5, False, {"error": str(e)}

    def _get_observation(self):