parent
adeb153c8a
commit
f1836172d6
|
|
@ -23,7 +23,7 @@ def train_dizhu_model():
|
||||||
|
|
||||||
# 训练模型,设定总训练步数
|
# 训练模型,设定总训练步数
|
||||||
logger.info("开始训练斗地主模型...")
|
logger.info("开始训练斗地主模型...")
|
||||||
model.learn(total_timesteps=100000) # 总训练步数
|
model.learn(total_timesteps=10000000000000000) # 总训练步数
|
||||||
logger.info("斗地主模型训练完成!")
|
logger.info("斗地主模型训练完成!")
|
||||||
|
|
||||||
# 保存训练后的模型
|
# 保存训练后的模型
|
||||||
|
|
|
||||||
|
|
@ -43,8 +43,8 @@ class DiZhuEngine:
|
||||||
def get_current_player(self):
|
def get_current_player(self):
|
||||||
"""获取当前玩家对象"""
|
"""获取当前玩家对象"""
|
||||||
current_player = self.players[self.current_player_index]
|
current_player = self.players[self.current_player_index]
|
||||||
logger.info(f"当前玩家: 玩家 {self.current_player_index + 1} ({current_player.role})")
|
# logger.info(f"当前玩家: 玩家 {self.current_player_index + 1} ({current_player.role})")
|
||||||
logger.info(f"当前玩家手牌: {current_player.get_hand_cards_as_strings()}")
|
# logger.info(f"当前玩家手牌: {current_player.get_hand_cards_as_strings()}")
|
||||||
return current_player
|
return current_player
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
|
|
@ -55,7 +55,7 @@ class DiZhuEngine:
|
||||||
current_player = self.get_current_player()
|
current_player = self.get_current_player()
|
||||||
|
|
||||||
if action == "pass":
|
if action == "pass":
|
||||||
logger.info(f"玩家 {self.current_player_index + 1} 选择过牌")
|
# logger.info(f"玩家 {self.current_player_index + 1} 选择过牌")
|
||||||
self.pass_count += 1
|
self.pass_count += 1
|
||||||
|
|
||||||
# 如果所有其他玩家都过牌,允许最后出牌玩家再次出牌
|
# 如果所有其他玩家都过牌,允许最后出牌玩家再次出牌
|
||||||
|
|
@ -63,18 +63,18 @@ class DiZhuEngine:
|
||||||
self.current_player_index = self.last_player
|
self.current_player_index = self.last_player
|
||||||
self.pass_count = 0 # 重置过牌计数
|
self.pass_count = 0 # 重置过牌计数
|
||||||
self.current_pile = None # 清空当前牌面
|
self.current_pile = None # 清空当前牌面
|
||||||
logger.info(f"所有玩家过牌,玩家 {self.current_player_index + 1} 可以继续出牌")
|
# logger.info(f"所有玩家过牌,玩家 {self.current_player_index + 1} 可以继续出牌")
|
||||||
else:
|
else:
|
||||||
# 出牌逻辑
|
# 出牌逻辑
|
||||||
if not isinstance(action, list):
|
if not isinstance(action, list):
|
||||||
action = [action]
|
action = [action]
|
||||||
|
|
||||||
if not all(card in current_player.hand_cards for card in action):
|
if not all(card in current_player.hand_cards for card in action):
|
||||||
logger.error(f"玩家 {self.current_player_index + 1} 的动作非法: {action}")
|
# logger.error(f"玩家 {self.current_player_index + 1} 的动作非法: {action}")
|
||||||
raise ValueError(f"玩家手牌不足以完成此次出牌: {action}")
|
raise ValueError(f"玩家手牌不足以完成此次出牌: {action}")
|
||||||
|
|
||||||
if self.current_pile and not self._can_beat(self.current_pile, action):
|
if self.current_pile and not self._can_beat(self.current_pile, action):
|
||||||
logger.error(f"玩家 {self.current_player_index + 1} 出牌非法: {action}")
|
# logger.error(f"玩家 {self.current_player_index + 1} 出牌非法: {action}")
|
||||||
raise ValueError(f"出牌无法打过当前牌面: {action}")
|
raise ValueError(f"出牌无法打过当前牌面: {action}")
|
||||||
|
|
||||||
# 出牌成功
|
# 出牌成功
|
||||||
|
|
@ -82,7 +82,7 @@ class DiZhuEngine:
|
||||||
self.pass_count = 0 # 出牌后重置过牌计数
|
self.pass_count = 0 # 出牌后重置过牌计数
|
||||||
self.last_player = self.current_player_index # 更新最后出牌的玩家
|
self.last_player = self.current_player_index # 更新最后出牌的玩家
|
||||||
|
|
||||||
logger.info(f"玩家 {self.current_player_index + 1} 出牌: {[card_to_string(card) for card in action]}")
|
# logger.info(f"玩家 {self.current_player_index + 1} 出牌: {[card_to_string(card) for card in action]}")
|
||||||
|
|
||||||
# 从手牌中移除
|
# 从手牌中移除
|
||||||
for card in action:
|
for card in action:
|
||||||
|
|
@ -172,11 +172,11 @@ class DiZhuEngine:
|
||||||
"game_over": self.game_over,
|
"game_over": self.game_over,
|
||||||
}
|
}
|
||||||
logger.info("当前游戏状态: ")
|
logger.info("当前游戏状态: ")
|
||||||
logger.info(f"地主牌: {[card_to_string(card) for card in self.landlord_cards]}")
|
# logger.info(f"地主牌: {[card_to_string(card) for card in self.landlord_cards]}")
|
||||||
for i, player in enumerate(self.players):
|
# for i, player in enumerate(self.players):
|
||||||
logger.info(f"玩家 {i + 1} ({player.role}) 手牌: {player.get_hand_cards_as_strings()}")
|
# logger.info(f"玩家 {i + 1} ({player.role}) 手牌: {player.get_hand_cards_as_strings()}")
|
||||||
logger.info(f"玩家 {i + 1} 出牌历史: {[card_to_string(card) for play in player.history for card in play]}")
|
# logger.info(f"玩家 {i + 1} 出牌历史: {[card_to_string(card) for play in player.history for card in play]}")
|
||||||
logger.info(f"当前玩家索引: {self.current_player_index}")
|
# logger.info(f"当前玩家索引: {self.current_player_index}")
|
||||||
logger.info(f"游戏是否结束: {self.game_over}")
|
logger.info(f"游戏是否结束: {self.game_over}")
|
||||||
return state
|
return state
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,7 @@ class ChengduMahjongEnv(gym.Env):
|
||||||
# **执行动作**
|
# **执行动作**
|
||||||
if action < max_hand_actions: # 打牌动作
|
if action < max_hand_actions: # 打牌动作
|
||||||
tile = hand[action]
|
tile = hand[action]
|
||||||
logger.info(f"玩家 {current_player} 选择打牌: {tile}")
|
# logger.info(f"玩家 {current_player} 选择打牌: {tile}")
|
||||||
self.engine.check_other_players(tile)
|
self.engine.check_other_players(tile)
|
||||||
elif action == max_hand_actions: # 碰
|
elif action == max_hand_actions: # 碰
|
||||||
tile_to_peng = self._get_tile_for_special_action("peng")
|
tile_to_peng = self._get_tile_for_special_action("peng")
|
||||||
|
|
|
||||||
|
|
@ -30,16 +30,16 @@ class DouDiZhuEnv(gym.Env):
|
||||||
try:
|
try:
|
||||||
reward = 0 # 初始化奖励
|
reward = 0 # 初始化奖励
|
||||||
current_player = self.engine.get_current_player()
|
current_player = self.engine.get_current_player()
|
||||||
logger.info(f"当前玩家: 玩家 {self.engine.current_player_index + 1} 手牌: {current_player.get_hand_cards_as_strings()}")
|
# logger.info(f"当前玩家: 玩家 {self.engine.current_player_index + 1} 手牌: {current_player.get_hand_cards_as_strings()}")
|
||||||
|
|
||||||
if action == 0: # 过牌
|
if action == 0: # 过牌
|
||||||
logger.info(f"玩家 {self.engine.current_player_index + 1} 选择过牌")
|
# logger.info(f"玩家 {self.engine.current_player_index + 1} 选择过牌")
|
||||||
self.engine.step("pass")
|
self.engine.step("pass")
|
||||||
reward -= 0.5 # 对频繁过牌给予轻微惩罚
|
reward -= 0.5 # 对频繁过牌给予轻微惩罚
|
||||||
else:
|
else:
|
||||||
# 玩家选择出牌
|
# 玩家选择出牌
|
||||||
action_cards = self._decode_action(action) # 解码动作为具体的牌型
|
action_cards = self._decode_action(action) # 解码动作为具体的牌型
|
||||||
logger.info(f"玩家 {self.engine.current_player_index + 1} 选择出牌: {action_cards}")
|
# logger.info(f"玩家 {self.engine.current_player_index + 1} 选择出牌: {action_cards}")
|
||||||
|
|
||||||
# 出牌前的手牌数量
|
# 出牌前的手牌数量
|
||||||
previous_hand_count = len(current_player.hand_cards)
|
previous_hand_count = len(current_player.hand_cards)
|
||||||
|
|
@ -57,13 +57,13 @@ class DouDiZhuEnv(gym.Env):
|
||||||
done = self.engine.game_over
|
done = self.engine.game_over
|
||||||
if done:
|
if done:
|
||||||
reward += 10 # 胜利时给予较大的奖励
|
reward += 10 # 胜利时给予较大的奖励
|
||||||
logger.info(f"游戏结束!胜利玩家: {self.engine.current_player_index + 1}")
|
# logger.info(f"游戏结束!胜利玩家: {self.engine.current_player_index + 1}")
|
||||||
|
|
||||||
return self._get_observation(), reward, done, {}
|
return self._get_observation(), reward, done, {}
|
||||||
|
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
# 对无效动作设置较大的负奖励
|
# 对无效动作设置较大的负奖励
|
||||||
logger.error(f"无效动作: {e}")
|
# logger.error(f"无效动作: {e}")
|
||||||
return self._get_observation(), -5, False, {"error": str(e)}
|
return self._get_observation(), -5, False, {"error": str(e)}
|
||||||
|
|
||||||
def _get_observation(self):
|
def _get_observation(self):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue