From f1836172d6ce506488481de5e2309b2ad603befc Mon Sep 17 00:00:00 2001
From: wsy182 <2392948297@qq.com>
Date: Mon, 2 Dec 2024 00:20:51 +0800
Subject: [PATCH] 1

1
---
 scripts/train_dizhu_model.py           |  2 +-
 src/engine/dizhu/dizhu_engine.py       | 24 ++++++++++++------------
 src/environment/chengdu_mahjong_env.py |  2 +-
 src/environment/dizhu_env.py           | 10 +++++-----
 4 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/scripts/train_dizhu_model.py b/scripts/train_dizhu_model.py
index 5cefb69..1579483 100644
--- a/scripts/train_dizhu_model.py
+++ b/scripts/train_dizhu_model.py
@@ -23,7 +23,7 @@ def train_dizhu_model():
 
     # 训练模型，设定总训练步数
     logger.info("开始训练斗地主模型...")
-    model.learn(total_timesteps=100000)  # 总训练步数
+    model.learn(total_timesteps=10000000000000000)  # 总训练步数
     logger.info("斗地主模型训练完成！")
 
     # 保存训练后的模型
diff --git a/src/engine/dizhu/dizhu_engine.py b/src/engine/dizhu/dizhu_engine.py
index 9024169..8bfd1a6 100644
--- a/src/engine/dizhu/dizhu_engine.py
+++ b/src/engine/dizhu/dizhu_engine.py
@@ -43,8 +43,8 @@ class DiZhuEngine:
     def get_current_player(self):
         """获取当前玩家对象"""
         current_player = self.players[self.current_player_index]
-        logger.info(f"当前玩家: 玩家 {self.current_player_index + 1} ({current_player.role})")
-        logger.info(f"当前玩家手牌: {current_player.get_hand_cards_as_strings()}")
+        # logger.info(f"当前玩家: 玩家 {self.current_player_index + 1} ({current_player.role})")
+        # logger.info(f"当前玩家手牌: {current_player.get_hand_cards_as_strings()}")
         return current_player
 
     def step(self, action):
@@ -55,7 +55,7 @@ class DiZhuEngine:
         current_player = self.get_current_player()
 
         if action == "pass":
-            logger.info(f"玩家 {self.current_player_index + 1} 选择过牌")
+            # logger.info(f"玩家 {self.current_player_index + 1} 选择过牌")
             self.pass_count += 1
 
             # 如果所有其他玩家都过牌，允许最后出牌玩家再次出牌
@@ -63,18 +63,18 @@ class DiZhuEngine:
                 self.current_player_index = self.last_player
                 self.pass_count = 0  # 重置过牌计数
                 self.current_pile = None  # 清空当前牌面
-                logger.info(f"所有玩家过牌，玩家 {self.current_player_index + 1} 可以继续出牌")
+                # logger.info(f"所有玩家过牌，玩家 {self.current_player_index + 1} 可以继续出牌")
         else:
             # 出牌逻辑
             if not isinstance(action, list):
                 action = [action]
 
             if not all(card in current_player.hand_cards for card in action):
-                logger.error(f"玩家 {self.current_player_index + 1} 的动作非法: {action}")
+                # logger.error(f"玩家 {self.current_player_index + 1} 的动作非法: {action}")
                 raise ValueError(f"玩家手牌不足以完成此次出牌: {action}")
 
             if self.current_pile and not self._can_beat(self.current_pile, action):
-                logger.error(f"玩家 {self.current_player_index + 1} 出牌非法: {action}")
+                # logger.error(f"玩家 {self.current_player_index + 1} 出牌非法: {action}")
                 raise ValueError(f"出牌无法打过当前牌面: {action}")
 
             # 出牌成功
@@ -82,7 +82,7 @@ class DiZhuEngine:
             self.pass_count = 0  # 出牌后重置过牌计数
             self.last_player = self.current_player_index  # 更新最后出牌的玩家
 
-            logger.info(f"玩家 {self.current_player_index + 1} 出牌: {[card_to_string(card) for card in action]}")
+            # logger.info(f"玩家 {self.current_player_index + 1} 出牌: {[card_to_string(card) for card in action]}")
 
             # 从手牌中移除
             for card in action:
@@ -172,11 +172,11 @@ class DiZhuEngine:
             "game_over": self.game_over,
         }
         logger.info("当前游戏状态: ")
-        logger.info(f"地主牌: {[card_to_string(card) for card in self.landlord_cards]}")
-        for i, player in enumerate(self.players):
-            logger.info(f"玩家 {i + 1} ({player.role}) 手牌: {player.get_hand_cards_as_strings()}")
-            logger.info(f"玩家 {i + 1} 出牌历史: {[card_to_string(card) for play in player.history for card in play]}")
-        logger.info(f"当前玩家索引: {self.current_player_index}")
+        # logger.info(f"地主牌: {[card_to_string(card) for card in self.landlord_cards]}")
+        # for i, player in enumerate(self.players):
+            # logger.info(f"玩家 {i + 1} ({player.role}) 手牌: {player.get_hand_cards_as_strings()}")
+            # logger.info(f"玩家 {i + 1} 出牌历史: {[card_to_string(card) for play in player.history for card in play]}")
+        # logger.info(f"当前玩家索引: {self.current_player_index}")
         logger.info(f"游戏是否结束: {self.game_over}")
         return state
 
diff --git a/src/environment/chengdu_mahjong_env.py b/src/environment/chengdu_mahjong_env.py
index a06bbe6..62ec658 100644
--- a/src/environment/chengdu_mahjong_env.py
+++ b/src/environment/chengdu_mahjong_env.py
@@ -60,7 +60,7 @@ class ChengduMahjongEnv(gym.Env):
         # **执行动作**
         if action < max_hand_actions:  # 打牌动作
             tile = hand[action]
-            logger.info(f"玩家 {current_player} 选择打牌: {tile}")
+            # logger.info(f"玩家 {current_player} 选择打牌: {tile}")
             self.engine.check_other_players(tile)
         elif action == max_hand_actions:  # 碰
             tile_to_peng = self._get_tile_for_special_action("peng")
diff --git a/src/environment/dizhu_env.py b/src/environment/dizhu_env.py
index fe1c036..a9acbc2 100644
--- a/src/environment/dizhu_env.py
+++ b/src/environment/dizhu_env.py
@@ -30,16 +30,16 @@ class DouDiZhuEnv(gym.Env):
         try:
             reward = 0  # 初始化奖励
             current_player = self.engine.get_current_player()
-            logger.info(f"当前玩家: 玩家 {self.engine.current_player_index + 1} 手牌: {current_player.get_hand_cards_as_strings()}")
+            # logger.info(f"当前玩家: 玩家 {self.engine.current_player_index + 1} 手牌: {current_player.get_hand_cards_as_strings()}")
 
             if action == 0:  # 过牌
-                logger.info(f"玩家 {self.engine.current_player_index + 1} 选择过牌")
+                # logger.info(f"玩家 {self.engine.current_player_index + 1} 选择过牌")
                 self.engine.step("pass")
                 reward -= 0.5  # 对频繁过牌给予轻微惩罚
             else:
                 # 玩家选择出牌
                 action_cards = self._decode_action(action)  # 解码动作为具体的牌型
-                logger.info(f"玩家 {self.engine.current_player_index + 1} 选择出牌: {action_cards}")
+                # logger.info(f"玩家 {self.engine.current_player_index + 1} 选择出牌: {action_cards}")
 
                 # 出牌前的手牌数量
                 previous_hand_count = len(current_player.hand_cards)
@@ -57,13 +57,13 @@ class DouDiZhuEnv(gym.Env):
             done = self.engine.game_over
             if done:
                 reward += 10  # 胜利时给予较大的奖励
-                logger.info(f"游戏结束！胜利玩家: {self.engine.current_player_index + 1}")
+                # logger.info(f"游戏结束！胜利玩家: {self.engine.current_player_index + 1}")
 
             return self._get_observation(), reward, done, {}
 
         except ValueError as e:
             # 对无效动作设置较大的负奖励
-            logger.error(f"无效动作: {e}")
+            # logger.error(f"无效动作: {e}")
             return self._get_observation(), -5, False, {"error": str(e)}
 
     def _get_observation(self):