parent
3ff448b15d
commit
bf1c5116be
|
|
@ -1,6 +1,6 @@
|
||||||
import gym
|
import gym
|
||||||
from stable_baselines3 import PPO
|
from stable_baselines3 import PPO
|
||||||
from src.environment.chengdu_majiang_env import MahjongEnv
|
from src.environment.chengdu_mahjong_env import MahjongEnv
|
||||||
import torch
|
import torch
|
||||||
from configs.log_config import setup_logging
|
from configs.log_config import setup_logging
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ import random
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from configs.log_config import setup_logging
|
from configs.log_config import setup_logging
|
||||||
from src.engine.actions import draw_tile, check_blood_battle, should_gang, random_choice
|
from src.engine.actions import draw_tile, should_gang, random_choice, handle_win, handle_gang, handle_peng
|
||||||
from src.engine.actions import set_missing_suit, check_other_players
|
from src.engine.actions import set_missing_suit, check_other_players
|
||||||
from src.engine.chengdu_mahjong_state import ChengduMahjongState
|
from src.engine.chengdu_mahjong_state import ChengduMahjongState
|
||||||
|
|
||||||
|
|
@ -133,3 +133,41 @@ class ChengduMahjongEngine:
|
||||||
self.play_turn()
|
self.play_turn()
|
||||||
|
|
||||||
logger.info("游戏已结束")
|
logger.info("游戏已结束")
|
||||||
|
|
||||||
|
def check_other_players(self, tile):
|
||||||
|
"""
|
||||||
|
检查其他玩家是否可以对打出的牌进行操作(如胡牌、杠、碰)。
|
||||||
|
优先级为:胡牌 > 杠牌 > 碰牌。
|
||||||
|
如果有玩家选择操作,修改游戏状态和出牌顺序。
|
||||||
|
"""
|
||||||
|
current_player = self.state.current_player
|
||||||
|
actions_taken = False
|
||||||
|
|
||||||
|
for player in range(4):
|
||||||
|
if player == current_player:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 优先检查胡牌
|
||||||
|
if self.state.can_win(self.state.hands[player], self.state.melds[player], self.state.missing_suits[player]):
|
||||||
|
logger.info(f"玩家 {player} 可以胡玩家 {current_player} 的牌: {tile}")
|
||||||
|
handle_win(player, current_player, tile)
|
||||||
|
actions_taken = True
|
||||||
|
break # 胡牌后结束
|
||||||
|
|
||||||
|
# 检查是否可以杠牌
|
||||||
|
if self.state.hands[player].tile_count[tile] >= 3:
|
||||||
|
logger.info(f"玩家 {player} 可以杠玩家 {current_player} 的牌: {tile}")
|
||||||
|
if handle_gang(self, player, tile, mode="ming"): # 执行明杠逻辑
|
||||||
|
actions_taken = True
|
||||||
|
break # 杠牌后不检查其他玩家
|
||||||
|
|
||||||
|
# 检查是否可以碰牌
|
||||||
|
if self.state.hands[player].tile_count[tile] >= 2:
|
||||||
|
logger.info(f"玩家 {player} 可以碰玩家 {current_player} 的牌: {tile}")
|
||||||
|
if handle_peng(self, player, tile): # 执行碰牌逻辑
|
||||||
|
actions_taken = True
|
||||||
|
break # 碰牌后不检查其他玩家
|
||||||
|
|
||||||
|
if not actions_taken:
|
||||||
|
logger.info(f"玩家 {current_player} 打出的牌 {tile} 没有触发其他玩家的操作")
|
||||||
|
return actions_taken
|
||||||
|
|
@ -0,0 +1,144 @@
|
||||||
|
import gym
|
||||||
|
from gym import spaces
|
||||||
|
import numpy as np
|
||||||
|
from src.engine.chengdu_mahjong_engine import ChengduMahjongEngine
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
|
||||||
|
class ChengduMahjongEnv(gym.Env):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
# 初始化麻将引擎
|
||||||
|
self.engine = ChengduMahjongEngine()
|
||||||
|
|
||||||
|
# 定义动作空间:打牌(0-13)+ 特殊动作(14: 碰, 15: 杠, 16: 胡)
|
||||||
|
self.action_space = spaces.Discrete(14 + 3)
|
||||||
|
|
||||||
|
# 定义观察空间:手牌、明牌、弃牌和庄家信息
|
||||||
|
self.observation_space = spaces.Dict({
|
||||||
|
"hand": spaces.Box(low=0, high=4, shape=(108,), dtype=np.int32), # 手牌数量
|
||||||
|
"melds": spaces.Box(low=0, high=4, shape=(108,), dtype=np.int32), # 明牌数量
|
||||||
|
"discard_pile": spaces.Box(low=0, high=4, shape=(108,), dtype=np.int32), # 弃牌数量
|
||||||
|
"dealer": spaces.Discrete(4), # 当前庄家
|
||||||
|
})
|
||||||
|
|
||||||
|
# 初始化游戏
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
"""重置游戏状态"""
|
||||||
|
self.engine = ChengduMahjongEngine() # 重置引擎
|
||||||
|
self.engine.initialize_game()
|
||||||
|
self.engine.deal_tiles()
|
||||||
|
return self._get_observation()
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
"""
|
||||||
|
执行动作,更新状态并返回结果。
|
||||||
|
:param action: 动作(0-13 表示打牌, 14 表示碰, 15 表示杠, 16 表示胡)
|
||||||
|
:return: obs, reward, done, info
|
||||||
|
"""
|
||||||
|
current_player = self.engine.state.current_player
|
||||||
|
|
||||||
|
# **1. 检查动作是否合法并执行**
|
||||||
|
if action < 14: # 打牌动作
|
||||||
|
if action >= len(self.engine.state.hands[current_player].tiles):
|
||||||
|
raise ValueError(f"动作 {action} 超出手牌范围")
|
||||||
|
tile = self.engine.state.hands[current_player].tiles[action]
|
||||||
|
logger.info(f"玩家 {current_player} 选择打牌: {tile}")
|
||||||
|
self.engine.check_other_players(tile)
|
||||||
|
elif action == 14: # 碰
|
||||||
|
tile_to_peng = self._get_tile_for_special_action("peng")
|
||||||
|
if tile_to_peng:
|
||||||
|
self.engine.handle_peng(current_player, tile_to_peng)
|
||||||
|
else:
|
||||||
|
logger.warning("碰动作无效,未满足条件")
|
||||||
|
elif action == 15: # 杠
|
||||||
|
tile_to_gang = self._get_tile_for_special_action("gang")
|
||||||
|
if tile_to_gang:
|
||||||
|
self.engine.handle_gang(current_player, tile_to_gang, mode="an") # 默认暗杠
|
||||||
|
else:
|
||||||
|
logger.warning("杠动作无效,未满足条件")
|
||||||
|
elif action == 16: # 胡
|
||||||
|
if self.engine.state.can_win(
|
||||||
|
self.engine.state.hands[current_player],
|
||||||
|
self.engine.state.melds[current_player],
|
||||||
|
self.engine.state.missing_suits[current_player]
|
||||||
|
):
|
||||||
|
self.engine.handle_win(current_player, None, None)
|
||||||
|
else:
|
||||||
|
logger.warning("胡动作无效,未满足条件")
|
||||||
|
else:
|
||||||
|
raise ValueError(f"无效的动作: {action}")
|
||||||
|
|
||||||
|
# **2. 更新状态**
|
||||||
|
obs = self._get_observation()
|
||||||
|
|
||||||
|
# **3. 奖励设计**
|
||||||
|
reward = self._calculate_reward(current_player)
|
||||||
|
|
||||||
|
# **4. 检查游戏是否结束**
|
||||||
|
self.engine.check_game_over()
|
||||||
|
done = self.engine.game_over
|
||||||
|
|
||||||
|
# **5. 返回值**
|
||||||
|
info = {
|
||||||
|
"player": current_player,
|
||||||
|
"action": action,
|
||||||
|
}
|
||||||
|
return obs, reward, done, info
|
||||||
|
|
||||||
|
def _get_observation(self):
|
||||||
|
"""
|
||||||
|
提取当前玩家的观察空间
|
||||||
|
:return: dict
|
||||||
|
"""
|
||||||
|
player_index = self.engine.state.current_player
|
||||||
|
hand = np.zeros(108, dtype=np.int32)
|
||||||
|
melds = np.zeros(108, dtype=np.int32)
|
||||||
|
discard_pile = np.zeros(108, dtype=np.int32)
|
||||||
|
|
||||||
|
# 填充手牌、明牌和弃牌信息
|
||||||
|
for tile, count in self.engine.state.hands[player_index].tile_count.items():
|
||||||
|
hand[tile.index] = count
|
||||||
|
for meld in self.engine.state.melds[player_index]:
|
||||||
|
melds[meld.tile.index] += meld.count
|
||||||
|
for tile in self.engine.state.discards[player_index]:
|
||||||
|
discard_pile[tile.index] += 1
|
||||||
|
|
||||||
|
return {
|
||||||
|
"hand": hand,
|
||||||
|
"melds": melds,
|
||||||
|
"discard_pile": discard_pile,
|
||||||
|
"dealer": self.engine.state.current_player,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _calculate_reward(self, current_player):
|
||||||
|
"""
|
||||||
|
奖励设计:基于分数变化
|
||||||
|
:return: float
|
||||||
|
"""
|
||||||
|
return self.engine.state.scores[current_player] - 100
|
||||||
|
|
||||||
|
def _get_tile_for_special_action(self, action_type):
|
||||||
|
"""
|
||||||
|
获取可碰、杠、胡的牌
|
||||||
|
:param action_type: "peng", "gang", "win"
|
||||||
|
:return: tile or None
|
||||||
|
"""
|
||||||
|
if action_type == "peng":
|
||||||
|
for tile, count in self.engine.state.hands[self.engine.state.current_player].tile_count.items():
|
||||||
|
if count == 2: # 碰需要两张相同的牌
|
||||||
|
return tile
|
||||||
|
elif action_type == "gang":
|
||||||
|
for tile, count in self.engine.state.hands[self.engine.state.current_player].tile_count.items():
|
||||||
|
if count == 4: # 杠需要四张相同的牌
|
||||||
|
return tile
|
||||||
|
elif action_type == "win":
|
||||||
|
if self.engine.state.can_win(
|
||||||
|
self.engine.state.hands[self.engine.state.current_player],
|
||||||
|
self.engine.state.melds[self.engine.state.current_player],
|
||||||
|
self.engine.state.missing_suits[self.engine.state.current_player]
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
return None
|
||||||
|
|
@ -1,63 +0,0 @@
|
||||||
import gym
|
|
||||||
from gym import spaces
|
|
||||||
import numpy as np
|
|
||||||
from src.engine.chengdu_mahjong_state import ChengduMahjongState
|
|
||||||
|
|
||||||
class ChengduMahjongEnv(gym.Env):
|
|
||||||
def __init__(self):
|
|
||||||
super().__init__()
|
|
||||||
self.state = ChengduMahjongState()
|
|
||||||
self.action_space = spaces.Discrete(5) # 0: 出牌, 1: 碰, 2: 杠, 3: 胡, 4: 过
|
|
||||||
self.observation_space = spaces.Dict({
|
|
||||||
"hand": spaces.Box(low=0, high=4, shape=(108,), dtype=np.int32), # 手牌数量
|
|
||||||
"melds": spaces.Box(low=0, high=4, shape=(108,), dtype=np.int32), # 明牌数量
|
|
||||||
"discard_pile": spaces.Box(low=0, high=4, shape=(108,), dtype=np.int32), # 弃牌数量
|
|
||||||
"dealer": spaces.Discrete(4), # 当前庄家
|
|
||||||
})
|
|
||||||
self.reset()
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
"""重置游戏状态"""
|
|
||||||
self.state.reset() # 初始化游戏状态
|
|
||||||
return self._get_observation()
|
|
||||||
|
|
||||||
def step(self, action):
|
|
||||||
reward = 0
|
|
||||||
done = False
|
|
||||||
|
|
||||||
if action == 0: # 出牌
|
|
||||||
self.state.discard()
|
|
||||||
elif action == 1: # 碰
|
|
||||||
self.state.peng()
|
|
||||||
elif action == 2: # 杠
|
|
||||||
self.state.kong()
|
|
||||||
elif action == 3: # 胡
|
|
||||||
reward, done = self.state.win()
|
|
||||||
elif action == 4: # 过
|
|
||||||
self.state.pass_turn()
|
|
||||||
|
|
||||||
# 检查游戏是否结束
|
|
||||||
done = done or self.state.is_game_over()
|
|
||||||
return self._get_observation(), reward, done, {}
|
|
||||||
|
|
||||||
def _get_observation(self):
|
|
||||||
"""获取玩家当前的观察空间"""
|
|
||||||
player_index = self.state.current_player
|
|
||||||
hand = np.zeros(108, dtype=np.int32)
|
|
||||||
melds = np.zeros(108, dtype=np.int32)
|
|
||||||
discard_pile = np.zeros(108, dtype=np.int32)
|
|
||||||
|
|
||||||
# 填充手牌、明牌和弃牌信息
|
|
||||||
for tile, count in self.state.hands[player_index].tile_count.items():
|
|
||||||
hand[tile.index] = count
|
|
||||||
for meld in self.state.melds[player_index]:
|
|
||||||
melds[meld.tile.index] += meld.count
|
|
||||||
for tile in self.state.discards[player_index]:
|
|
||||||
discard_pile[tile.index] += 1
|
|
||||||
|
|
||||||
return {
|
|
||||||
"hand": hand,
|
|
||||||
"melds": melds,
|
|
||||||
"discard_pile": discard_pile,
|
|
||||||
"dealer": self.state.current_player
|
|
||||||
}
|
|
||||||
Loading…
Reference in New Issue