From f1836172d6ce506488481de5e2309b2ad603befc Mon Sep 17 00:00:00 2001 From: wsy182 <2392948297@qq.com> Date: Mon, 2 Dec 2024 00:20:51 +0800 Subject: [PATCH] 1 1 --- scripts/train_dizhu_model.py | 2 +- src/engine/dizhu/dizhu_engine.py | 24 ++++++++++++------------ src/environment/chengdu_mahjong_env.py | 2 +- src/environment/dizhu_env.py | 10 +++++----- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/scripts/train_dizhu_model.py b/scripts/train_dizhu_model.py index 5cefb69..1579483 100644 --- a/scripts/train_dizhu_model.py +++ b/scripts/train_dizhu_model.py @@ -23,7 +23,7 @@ def train_dizhu_model(): # 训练模型,设定总训练步数 logger.info("开始训练斗地主模型...") - model.learn(total_timesteps=100000) # 总训练步数 + model.learn(total_timesteps=10000000000000000) # 总训练步数 logger.info("斗地主模型训练完成!") # 保存训练后的模型 diff --git a/src/engine/dizhu/dizhu_engine.py b/src/engine/dizhu/dizhu_engine.py index 9024169..8bfd1a6 100644 --- a/src/engine/dizhu/dizhu_engine.py +++ b/src/engine/dizhu/dizhu_engine.py @@ -43,8 +43,8 @@ class DiZhuEngine: def get_current_player(self): """获取当前玩家对象""" current_player = self.players[self.current_player_index] - logger.info(f"当前玩家: 玩家 {self.current_player_index + 1} ({current_player.role})") - logger.info(f"当前玩家手牌: {current_player.get_hand_cards_as_strings()}") + # logger.info(f"当前玩家: 玩家 {self.current_player_index + 1} ({current_player.role})") + # logger.info(f"当前玩家手牌: {current_player.get_hand_cards_as_strings()}") return current_player def step(self, action): @@ -55,7 +55,7 @@ class DiZhuEngine: current_player = self.get_current_player() if action == "pass": - logger.info(f"玩家 {self.current_player_index + 1} 选择过牌") + # logger.info(f"玩家 {self.current_player_index + 1} 选择过牌") self.pass_count += 1 # 如果所有其他玩家都过牌,允许最后出牌玩家再次出牌 @@ -63,18 +63,18 @@ class DiZhuEngine: self.current_player_index = self.last_player self.pass_count = 0 # 重置过牌计数 self.current_pile = None # 清空当前牌面 - logger.info(f"所有玩家过牌,玩家 {self.current_player_index + 1} 可以继续出牌") + # logger.info(f"所有玩家过牌,玩家 {self.current_player_index + 1} 可以继续出牌") else: # 出牌逻辑 if not isinstance(action, list): action = [action] if not all(card in current_player.hand_cards for card in action): - logger.error(f"玩家 {self.current_player_index + 1} 的动作非法: {action}") + # logger.error(f"玩家 {self.current_player_index + 1} 的动作非法: {action}") raise ValueError(f"玩家手牌不足以完成此次出牌: {action}") if self.current_pile and not self._can_beat(self.current_pile, action): - logger.error(f"玩家 {self.current_player_index + 1} 出牌非法: {action}") + # logger.error(f"玩家 {self.current_player_index + 1} 出牌非法: {action}") raise ValueError(f"出牌无法打过当前牌面: {action}") # 出牌成功 @@ -82,7 +82,7 @@ class DiZhuEngine: self.pass_count = 0 # 出牌后重置过牌计数 self.last_player = self.current_player_index # 更新最后出牌的玩家 - logger.info(f"玩家 {self.current_player_index + 1} 出牌: {[card_to_string(card) for card in action]}") + # logger.info(f"玩家 {self.current_player_index + 1} 出牌: {[card_to_string(card) for card in action]}") # 从手牌中移除 for card in action: @@ -172,11 +172,11 @@ class DiZhuEngine: "game_over": self.game_over, } logger.info("当前游戏状态: ") - logger.info(f"地主牌: {[card_to_string(card) for card in self.landlord_cards]}") - for i, player in enumerate(self.players): - logger.info(f"玩家 {i + 1} ({player.role}) 手牌: {player.get_hand_cards_as_strings()}") - logger.info(f"玩家 {i + 1} 出牌历史: {[card_to_string(card) for play in player.history for card in play]}") - logger.info(f"当前玩家索引: {self.current_player_index}") + # logger.info(f"地主牌: {[card_to_string(card) for card in self.landlord_cards]}") + # for i, player in enumerate(self.players): + # logger.info(f"玩家 {i + 1} ({player.role}) 手牌: {player.get_hand_cards_as_strings()}") + # logger.info(f"玩家 {i + 1} 出牌历史: {[card_to_string(card) for play in player.history for card in play]}") + # logger.info(f"当前玩家索引: {self.current_player_index}") logger.info(f"游戏是否结束: {self.game_over}") return state diff --git a/src/environment/chengdu_mahjong_env.py b/src/environment/chengdu_mahjong_env.py index a06bbe6..62ec658 100644 --- a/src/environment/chengdu_mahjong_env.py +++ b/src/environment/chengdu_mahjong_env.py @@ -60,7 +60,7 @@ class ChengduMahjongEnv(gym.Env): # **执行动作** if action < max_hand_actions: # 打牌动作 tile = hand[action] - logger.info(f"玩家 {current_player} 选择打牌: {tile}") + # logger.info(f"玩家 {current_player} 选择打牌: {tile}") self.engine.check_other_players(tile) elif action == max_hand_actions: # 碰 tile_to_peng = self._get_tile_for_special_action("peng") diff --git a/src/environment/dizhu_env.py b/src/environment/dizhu_env.py index fe1c036..a9acbc2 100644 --- a/src/environment/dizhu_env.py +++ b/src/environment/dizhu_env.py @@ -30,16 +30,16 @@ class DouDiZhuEnv(gym.Env): try: reward = 0 # 初始化奖励 current_player = self.engine.get_current_player() - logger.info(f"当前玩家: 玩家 {self.engine.current_player_index + 1} 手牌: {current_player.get_hand_cards_as_strings()}") + # logger.info(f"当前玩家: 玩家 {self.engine.current_player_index + 1} 手牌: {current_player.get_hand_cards_as_strings()}") if action == 0: # 过牌 - logger.info(f"玩家 {self.engine.current_player_index + 1} 选择过牌") + # logger.info(f"玩家 {self.engine.current_player_index + 1} 选择过牌") self.engine.step("pass") reward -= 0.5 # 对频繁过牌给予轻微惩罚 else: # 玩家选择出牌 action_cards = self._decode_action(action) # 解码动作为具体的牌型 - logger.info(f"玩家 {self.engine.current_player_index + 1} 选择出牌: {action_cards}") + # logger.info(f"玩家 {self.engine.current_player_index + 1} 选择出牌: {action_cards}") # 出牌前的手牌数量 previous_hand_count = len(current_player.hand_cards) @@ -57,13 +57,13 @@ class DouDiZhuEnv(gym.Env): done = self.engine.game_over if done: reward += 10 # 胜利时给予较大的奖励 - logger.info(f"游戏结束!胜利玩家: {self.engine.current_player_index + 1}") + # logger.info(f"游戏结束!胜利玩家: {self.engine.current_player_index + 1}") return self._get_observation(), reward, done, {} except ValueError as e: # 对无效动作设置较大的负奖励 - logger.error(f"无效动作: {e}") + # logger.error(f"无效动作: {e}") return self._get_observation(), -5, False, {"error": str(e)} def _get_observation(self):