wsy182 2024-12-01 20:26:09 +08:00
parent 0864295a6e
commit 5eef2384cf
3 changed files with 104 additions and 22 deletions

View File

@ -243,11 +243,15 @@ def handle_peng(self, player, tile):
logger.error(f"玩家 {player} 无法碰牌: {tile}")
return False
# 减少两张牌
self.state.hands[player].tile_count[tile] -= 2
self.state.melds[player].append(Meld(tile, "")) # 确保使用 MahjongTile 对象
# 减少手牌中的牌数量
self.state.hands[player].remove_tile(tile) # 移除第一张
self.state.hands[player].remove_tile(tile) # 移除第二张
# 添加到明牌区
self.state.melds[player].append(Meld(tile, "")) # 使用 Meld 类表示明牌
logger.info(f"玩家 {player} 碰了牌: {tile}。当前明牌: {self.state.melds[player]}")
return True
@ -289,7 +293,7 @@ def handle_gang(self, player, tile, mode):
return False
# 更新状态
self._update_meld(player, tile, "", count=3)
self.update_meld(player, tile, "", count=3)
# 明杠分数计算
gang_score = base_score * 2
@ -481,7 +485,7 @@ def handle_win(self, player, current_player, tile):
logger.info(f"当前分数: {self.state.scores}")
def _update_meld(self, player, tile, meld_type, count):
def update_meld(self, player, tile, meld_type, count):
"""
更新玩家的明牌状态并移除相应的牌
"""

View File

@ -140,4 +140,12 @@ class ChengduMahjongEngine:
if not actions_taken:
logger.info(f"玩家 {current_player} 打出的牌 {tile} 没有触发其他玩家的操作")
return actions_taken
return actions_taken
def update_meld(self, player, tile, meld_type, count):
"""
更新玩家的明牌状态并移除相应的牌
"""
self.state.hands[player].tile_count[tile] -= count
self.state.melds[player].append((meld_type, tile))
logger.info(f"玩家 {player} 更新明牌: {meld_type} {tile},当前明牌: {self.state.melds[player]}")

View File

@ -13,9 +13,6 @@ class ChengduMahjongEnv(gym.Env):
# 初始化麻将引擎
self.engine = ChengduMahjongEngine()
# 定义动作空间打牌0-13+ 特殊动作14: 碰, 15: 杠, 16: 胡)
self.action_space = spaces.Discrete(14 + 3)
# 定义观察空间:手牌、明牌、弃牌和庄家信息
self.observation_space = spaces.Dict({
"hand": spaces.Box(low=0, high=4, shape=(108,), dtype=np.int32), # 手牌数量
@ -27,6 +24,15 @@ class ChengduMahjongEnv(gym.Env):
# 初始化游戏
self.reset()
@property
def action_space(self):
"""
动态生成当前动作空间
"""
valid_actions = self.get_action_space()
# 动态生成离散动作空间的大小
return spaces.Discrete(len(valid_actions))
def reset(self):
"""重置游戏状态"""
self.engine = ChengduMahjongEngine() # 重置引擎
@ -42,27 +48,35 @@ class ChengduMahjongEnv(gym.Env):
"""
current_player = self.engine.state.current_player
hand = self.engine.state.hands[current_player].tiles # 当前玩家手牌
logger.info(f"玩家 {current_player} 手牌: {self.engine.state.hands[current_player].tiles}")
# **1. 执行动作并检查合法性**
if action < len(hand): # 打牌动作
# **检查动作合法性**
max_hand_actions = len(hand) # 当前玩家手牌数量
max_action_index = max_hand_actions + 3 # 打牌 + 特殊动作
if action >= max_action_index:
raise ValueError(f"无效的动作: {action}")
# **执行动作**
if action < max_hand_actions: # 打牌动作
tile = hand[action]
logger.info(f"玩家 {current_player} 选择打牌: {tile}")
self.engine.check_other_players(tile)
elif action == 14: # 碰
elif action == max_hand_actions: # 碰
tile_to_peng = self._get_tile_for_special_action("peng")
if tile_to_peng:
handle_peng(self.engine, current_player, tile_to_peng)
logger.info(f"玩家 {current_player} 碰了牌: {tile_to_peng}")
else:
logger.warning("碰动作无效,未满足条件")
elif action == 15: # 杠
elif action == max_hand_actions + 1: # 杠
tile_to_gang = self._get_tile_for_special_action("gang")
if tile_to_gang:
handle_gang(self.engine, current_player, tile_to_gang, mode="an")
logger.info(f"玩家 {current_player} 杠了牌: {tile_to_gang}")
else:
logger.warning("杠动作无效,未满足条件")
elif action == 16: # 胡
elif action == max_hand_actions + 2: # 胡
if self.engine.state.can_win(
self.engine.state.hands[current_player],
self.engine.state.melds[current_player],
@ -72,28 +86,28 @@ class ChengduMahjongEnv(gym.Env):
logger.info(f"玩家 {current_player} 胡牌!")
else:
logger.warning("胡动作无效,未满足条件")
else:
raise ValueError(f"无效的动作: {action}")
# **2. 更新状态**
# **更新玩家轮次**
if not self.engine.game_over: # 确保游戏未结束时才轮转玩家
self.engine.state.current_player = (current_player + 1) % 4
# **更新状态**
obs = self._get_observation()
# **3. 奖励设计**
# **奖励设计**
reward = self._calculate_reward(current_player)
# **4. 检查游戏是否结束**
# **检查游戏是否结束**
self.engine.check_game_over()
done = self.engine.game_over
# **5. 返回值**
# **返回值**
info = {
"player": current_player,
"action": action,
}
return obs, reward, done, info
def _get_observation(self):
"""
提取当前玩家的观察空间
@ -148,3 +162,59 @@ class ChengduMahjongEnv(gym.Env):
):
return True
return None
def get_action_space(self):
"""
动态计算当前合法的动作空间
返回一个合法动作的列表其中:
- 0 len(hand.tiles) - 1 表示打出手牌的索引
- len(hand.tiles) 表示碰动作
- len(hand.tiles) + 1 表示杠动作
- len(hand.tiles) + 2 表示胡动作
"""
current_player = self.engine.state.current_player
hand = self.engine.state.hands[current_player]
valid_actions = []
# 打牌动作
valid_actions.extend(range(len(hand.tiles)))
# 特殊动作
if self._can_peng(current_player):
valid_actions.append(len(hand.tiles)) # 碰
if self._can_gang(current_player):
valid_actions.append(len(hand.tiles) + 1) # 杠
if self._can_hu(current_player):
valid_actions.append(len(hand.tiles) + 2) # 胡
return valid_actions
# 辅助函数判断特殊动作是否可执行
def _can_peng(self, player):
"""
判断玩家是否可以碰
"""
for tile, count in self.engine.state.hands[player].tile_count.items():
if count >= 2: # 至少两张相同的牌
return True
return False
def _can_gang(self, player):
"""
判断玩家是否可以杠
"""
for tile, count in self.engine.state.hands[player].tile_count.items():
if count == 4: # 有四张相同的牌
return True
return False
def _can_hu(self, player):
"""
判断玩家是否可以胡牌
"""
return self.engine.state.can_win(
self.engine.state.hands[player],
self.engine.state.melds[player],
self.engine.state.missing_suits[player]
)