parent
0864295a6e
commit
5eef2384cf
|
|
@ -243,11 +243,15 @@ def handle_peng(self, player, tile):
|
||||||
logger.error(f"玩家 {player} 无法碰牌: {tile}")
|
logger.error(f"玩家 {player} 无法碰牌: {tile}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# 减少两张牌
|
# 减少手牌中的牌数量
|
||||||
self.state.hands[player].tile_count[tile] -= 2
|
self.state.hands[player].remove_tile(tile) # 移除第一张
|
||||||
self.state.melds[player].append(Meld(tile, "碰")) # 确保使用 MahjongTile 对象
|
self.state.hands[player].remove_tile(tile) # 移除第二张
|
||||||
|
|
||||||
|
# 添加到明牌区
|
||||||
|
self.state.melds[player].append(Meld(tile, "碰")) # 使用 Meld 类表示明牌
|
||||||
|
|
||||||
logger.info(f"玩家 {player} 碰了牌: {tile}。当前明牌: {self.state.melds[player]}")
|
logger.info(f"玩家 {player} 碰了牌: {tile}。当前明牌: {self.state.melds[player]}")
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -289,7 +293,7 @@ def handle_gang(self, player, tile, mode):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# 更新状态
|
# 更新状态
|
||||||
self._update_meld(player, tile, "杠", count=3)
|
self.update_meld(player, tile, "杠", count=3)
|
||||||
|
|
||||||
# 明杠分数计算
|
# 明杠分数计算
|
||||||
gang_score = base_score * 2
|
gang_score = base_score * 2
|
||||||
|
|
@ -481,7 +485,7 @@ def handle_win(self, player, current_player, tile):
|
||||||
logger.info(f"当前分数: {self.state.scores}")
|
logger.info(f"当前分数: {self.state.scores}")
|
||||||
|
|
||||||
|
|
||||||
def _update_meld(self, player, tile, meld_type, count):
|
def update_meld(self, player, tile, meld_type, count):
|
||||||
"""
|
"""
|
||||||
更新玩家的明牌状态,并移除相应的牌。
|
更新玩家的明牌状态,并移除相应的牌。
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -141,3 +141,11 @@ class ChengduMahjongEngine:
|
||||||
if not actions_taken:
|
if not actions_taken:
|
||||||
logger.info(f"玩家 {current_player} 打出的牌 {tile} 没有触发其他玩家的操作")
|
logger.info(f"玩家 {current_player} 打出的牌 {tile} 没有触发其他玩家的操作")
|
||||||
return actions_taken
|
return actions_taken
|
||||||
|
|
||||||
|
def update_meld(self, player, tile, meld_type, count):
|
||||||
|
"""
|
||||||
|
更新玩家的明牌状态,并移除相应的牌。
|
||||||
|
"""
|
||||||
|
self.state.hands[player].tile_count[tile] -= count
|
||||||
|
self.state.melds[player].append((meld_type, tile))
|
||||||
|
logger.info(f"玩家 {player} 更新明牌: {meld_type} {tile},当前明牌: {self.state.melds[player]}")
|
||||||
|
|
@ -13,9 +13,6 @@ class ChengduMahjongEnv(gym.Env):
|
||||||
# 初始化麻将引擎
|
# 初始化麻将引擎
|
||||||
self.engine = ChengduMahjongEngine()
|
self.engine = ChengduMahjongEngine()
|
||||||
|
|
||||||
# 定义动作空间:打牌(0-13)+ 特殊动作(14: 碰, 15: 杠, 16: 胡)
|
|
||||||
self.action_space = spaces.Discrete(14 + 3)
|
|
||||||
|
|
||||||
# 定义观察空间:手牌、明牌、弃牌和庄家信息
|
# 定义观察空间:手牌、明牌、弃牌和庄家信息
|
||||||
self.observation_space = spaces.Dict({
|
self.observation_space = spaces.Dict({
|
||||||
"hand": spaces.Box(low=0, high=4, shape=(108,), dtype=np.int32), # 手牌数量
|
"hand": spaces.Box(low=0, high=4, shape=(108,), dtype=np.int32), # 手牌数量
|
||||||
|
|
@ -27,6 +24,15 @@ class ChengduMahjongEnv(gym.Env):
|
||||||
# 初始化游戏
|
# 初始化游戏
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def action_space(self):
|
||||||
|
"""
|
||||||
|
动态生成当前动作空间。
|
||||||
|
"""
|
||||||
|
valid_actions = self.get_action_space()
|
||||||
|
# 动态生成离散动作空间的大小
|
||||||
|
return spaces.Discrete(len(valid_actions))
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
"""重置游戏状态"""
|
"""重置游戏状态"""
|
||||||
self.engine = ChengduMahjongEngine() # 重置引擎
|
self.engine = ChengduMahjongEngine() # 重置引擎
|
||||||
|
|
@ -42,27 +48,35 @@ class ChengduMahjongEnv(gym.Env):
|
||||||
"""
|
"""
|
||||||
current_player = self.engine.state.current_player
|
current_player = self.engine.state.current_player
|
||||||
hand = self.engine.state.hands[current_player].tiles # 当前玩家手牌
|
hand = self.engine.state.hands[current_player].tiles # 当前玩家手牌
|
||||||
|
logger.info(f"玩家 {current_player} 手牌: {self.engine.state.hands[current_player].tiles}")
|
||||||
|
|
||||||
# **1. 执行动作并检查合法性**
|
# **检查动作合法性**
|
||||||
if action < len(hand): # 打牌动作
|
max_hand_actions = len(hand) # 当前玩家手牌数量
|
||||||
|
max_action_index = max_hand_actions + 3 # 打牌 + 特殊动作
|
||||||
|
|
||||||
|
if action >= max_action_index:
|
||||||
|
raise ValueError(f"无效的动作: {action}")
|
||||||
|
|
||||||
|
# **执行动作**
|
||||||
|
if action < max_hand_actions: # 打牌动作
|
||||||
tile = hand[action]
|
tile = hand[action]
|
||||||
logger.info(f"玩家 {current_player} 选择打牌: {tile}")
|
logger.info(f"玩家 {current_player} 选择打牌: {tile}")
|
||||||
self.engine.check_other_players(tile)
|
self.engine.check_other_players(tile)
|
||||||
elif action == 14: # 碰
|
elif action == max_hand_actions: # 碰
|
||||||
tile_to_peng = self._get_tile_for_special_action("peng")
|
tile_to_peng = self._get_tile_for_special_action("peng")
|
||||||
if tile_to_peng:
|
if tile_to_peng:
|
||||||
handle_peng(self.engine, current_player, tile_to_peng)
|
handle_peng(self.engine, current_player, tile_to_peng)
|
||||||
logger.info(f"玩家 {current_player} 碰了牌: {tile_to_peng}")
|
logger.info(f"玩家 {current_player} 碰了牌: {tile_to_peng}")
|
||||||
else:
|
else:
|
||||||
logger.warning("碰动作无效,未满足条件")
|
logger.warning("碰动作无效,未满足条件")
|
||||||
elif action == 15: # 杠
|
elif action == max_hand_actions + 1: # 杠
|
||||||
tile_to_gang = self._get_tile_for_special_action("gang")
|
tile_to_gang = self._get_tile_for_special_action("gang")
|
||||||
if tile_to_gang:
|
if tile_to_gang:
|
||||||
handle_gang(self.engine, current_player, tile_to_gang, mode="an")
|
handle_gang(self.engine, current_player, tile_to_gang, mode="an")
|
||||||
logger.info(f"玩家 {current_player} 杠了牌: {tile_to_gang}")
|
logger.info(f"玩家 {current_player} 杠了牌: {tile_to_gang}")
|
||||||
else:
|
else:
|
||||||
logger.warning("杠动作无效,未满足条件")
|
logger.warning("杠动作无效,未满足条件")
|
||||||
elif action == 16: # 胡
|
elif action == max_hand_actions + 2: # 胡
|
||||||
if self.engine.state.can_win(
|
if self.engine.state.can_win(
|
||||||
self.engine.state.hands[current_player],
|
self.engine.state.hands[current_player],
|
||||||
self.engine.state.melds[current_player],
|
self.engine.state.melds[current_player],
|
||||||
|
|
@ -72,28 +86,28 @@ class ChengduMahjongEnv(gym.Env):
|
||||||
logger.info(f"玩家 {current_player} 胡牌!")
|
logger.info(f"玩家 {current_player} 胡牌!")
|
||||||
else:
|
else:
|
||||||
logger.warning("胡动作无效,未满足条件")
|
logger.warning("胡动作无效,未满足条件")
|
||||||
else:
|
|
||||||
raise ValueError(f"无效的动作: {action}")
|
|
||||||
|
|
||||||
# **2. 更新状态**
|
# **更新玩家轮次**
|
||||||
|
if not self.engine.game_over: # 确保游戏未结束时才轮转玩家
|
||||||
|
self.engine.state.current_player = (current_player + 1) % 4
|
||||||
|
|
||||||
|
# **更新状态**
|
||||||
obs = self._get_observation()
|
obs = self._get_observation()
|
||||||
|
|
||||||
# **3. 奖励设计**
|
# **奖励设计**
|
||||||
reward = self._calculate_reward(current_player)
|
reward = self._calculate_reward(current_player)
|
||||||
|
|
||||||
# **4. 检查游戏是否结束**
|
# **检查游戏是否结束**
|
||||||
self.engine.check_game_over()
|
self.engine.check_game_over()
|
||||||
done = self.engine.game_over
|
done = self.engine.game_over
|
||||||
|
|
||||||
# **5. 返回值**
|
# **返回值**
|
||||||
info = {
|
info = {
|
||||||
"player": current_player,
|
"player": current_player,
|
||||||
"action": action,
|
"action": action,
|
||||||
}
|
}
|
||||||
return obs, reward, done, info
|
return obs, reward, done, info
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _get_observation(self):
|
def _get_observation(self):
|
||||||
"""
|
"""
|
||||||
提取当前玩家的观察空间
|
提取当前玩家的观察空间
|
||||||
|
|
@ -148,3 +162,59 @@ class ChengduMahjongEnv(gym.Env):
|
||||||
):
|
):
|
||||||
return True
|
return True
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def get_action_space(self):
|
||||||
|
"""
|
||||||
|
动态计算当前合法的动作空间。
|
||||||
|
返回一个合法动作的列表,其中:
|
||||||
|
- 0 到 len(hand.tiles) - 1 表示打出手牌的索引。
|
||||||
|
- len(hand.tiles) 表示碰动作。
|
||||||
|
- len(hand.tiles) + 1 表示杠动作。
|
||||||
|
- len(hand.tiles) + 2 表示胡动作。
|
||||||
|
"""
|
||||||
|
current_player = self.engine.state.current_player
|
||||||
|
hand = self.engine.state.hands[current_player]
|
||||||
|
valid_actions = []
|
||||||
|
|
||||||
|
# 打牌动作
|
||||||
|
valid_actions.extend(range(len(hand.tiles)))
|
||||||
|
|
||||||
|
# 特殊动作
|
||||||
|
if self._can_peng(current_player):
|
||||||
|
valid_actions.append(len(hand.tiles)) # 碰
|
||||||
|
if self._can_gang(current_player):
|
||||||
|
valid_actions.append(len(hand.tiles) + 1) # 杠
|
||||||
|
if self._can_hu(current_player):
|
||||||
|
valid_actions.append(len(hand.tiles) + 2) # 胡
|
||||||
|
|
||||||
|
return valid_actions
|
||||||
|
|
||||||
|
|
||||||
|
# 辅助函数判断特殊动作是否可执行
|
||||||
|
def _can_peng(self, player):
|
||||||
|
"""
|
||||||
|
判断玩家是否可以碰。
|
||||||
|
"""
|
||||||
|
for tile, count in self.engine.state.hands[player].tile_count.items():
|
||||||
|
if count >= 2: # 至少两张相同的牌
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _can_gang(self, player):
|
||||||
|
"""
|
||||||
|
判断玩家是否可以杠。
|
||||||
|
"""
|
||||||
|
for tile, count in self.engine.state.hands[player].tile_count.items():
|
||||||
|
if count == 4: # 有四张相同的牌
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _can_hu(self, player):
|
||||||
|
"""
|
||||||
|
判断玩家是否可以胡牌。
|
||||||
|
"""
|
||||||
|
return self.engine.state.can_win(
|
||||||
|
self.engine.state.hands[player],
|
||||||
|
self.engine.state.melds[player],
|
||||||
|
self.engine.state.missing_suits[player]
|
||||||
|
)
|
||||||
Loading…
Reference in New Issue