1
1
This commit is contained in:
@@ -1,120 +1,63 @@
|
||||
import gym
|
||||
import numpy as np
|
||||
from gym import spaces
|
||||
from src.engine.actions import draw_tile, discard_tile, peng, gang, check_blood_battle
|
||||
from src.engine.calculate_fan import calculate_fan, is_seven_pairs, is_cleared, is_big_pairs
|
||||
from src.engine.chengdu_mahjong_engine import ChengduMahjongEngine
|
||||
from src.engine.scoring import calculate_score
|
||||
import numpy as np
|
||||
from src.engine.chengdu_mahjong_state import ChengduMahjongState
|
||||
|
||||
|
||||
class MahjongEnv(gym.Env):
|
||||
class ChengduMahjongEnv(gym.Env):
|
||||
def __init__(self):
|
||||
super(MahjongEnv, self).__init__()
|
||||
self.engine = ChengduMahjongEngine()
|
||||
self.scores = [100, 100, 100, 100] # 四位玩家初始分数
|
||||
self.base_score = 1 # 底分
|
||||
self.max_rounds = 100 # 最大轮数,防止游戏无限进行
|
||||
self.current_round = 0 # 当前轮数
|
||||
self.action_space = spaces.Discrete(108) # 动作空间:打牌的索引
|
||||
self.observation_space = spaces.Box(low=0, high=4, shape=(108,), dtype=np.int32)
|
||||
super().__init__()
|
||||
self.state = ChengduMahjongState()
|
||||
self.action_space = spaces.Discrete(5) # 0: 出牌, 1: 碰, 2: 杠, 3: 胡, 4: 过
|
||||
self.observation_space = spaces.Dict({
|
||||
"hand": spaces.Box(low=0, high=4, shape=(108,), dtype=np.int32), # 手牌数量
|
||||
"melds": spaces.Box(low=0, high=4, shape=(108,), dtype=np.int32), # 明牌数量
|
||||
"discard_pile": spaces.Box(low=0, high=4, shape=(108,), dtype=np.int32), # 弃牌数量
|
||||
"dealer": spaces.Discrete(4), # 当前庄家
|
||||
})
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.engine = ChengduMahjongEngine()
|
||||
self.scores = [100, 100, 100, 100] # 每局重置分数
|
||||
self.current_round = 0
|
||||
return self.engine.state.hands[self.engine.state.current_player]
|
||||
"""重置游戏状态"""
|
||||
self.state.reset() # 初始化游戏状态
|
||||
return self._get_observation()
|
||||
|
||||
def step(self, action):
|
||||
"""
|
||||
执行玩家动作并更新游戏状态。
|
||||
|
||||
参数:
|
||||
- action: 玩家动作,0 代表摸牌,1 代表打牌,2 代表碰牌,3 代表杠牌
|
||||
|
||||
返回:
|
||||
- next_state: 当前玩家的手牌
|
||||
- reward: 奖励
|
||||
- done: 是否结束
|
||||
- info: 其他信息(如奖励历史等)
|
||||
"""
|
||||
done = False
|
||||
reward = 0
|
||||
done = False
|
||||
|
||||
try:
|
||||
if action == 0: # 0代表摸牌
|
||||
reward, done = draw_tile(self.engine) # 调用摸牌函数
|
||||
elif action == 1: # 1代表打牌
|
||||
tile = self.engine.state.hands[self.engine.state.current_player][0] # 假设选择第一张牌
|
||||
discard_tile(self.engine, tile) # 调用打牌函数
|
||||
reward, done = -1, False
|
||||
elif action == 2: # 2代表碰牌
|
||||
tile = self.engine.state.hands[self.engine.state.current_player][0] # 假设选择第一张牌
|
||||
peng(self.engine, tile) # 调用碰牌函数
|
||||
reward, done = 0, False
|
||||
elif action == 3: # 3代表杠牌
|
||||
tile = self.engine.state.hands[self.engine.state.current_player][0] # 假设选择第一张牌
|
||||
gang(self.engine, tile, mode="ming") # 暂时假设为明杠
|
||||
reward, done = 0, False
|
||||
if action == 0: # 出牌
|
||||
self.state.discard()
|
||||
elif action == 1: # 碰
|
||||
self.state.peng()
|
||||
elif action == 2: # 杠
|
||||
self.state.kong()
|
||||
elif action == 3: # 胡
|
||||
reward, done = self.state.win()
|
||||
elif action == 4: # 过
|
||||
self.state.pass_turn()
|
||||
|
||||
# 检查是否胡牌
|
||||
if self.engine.state.can_win(self.engine.state.hands[self.engine.state.current_player]):
|
||||
reward, done = self.handle_win() # 胡牌时处理胜利逻辑
|
||||
# 检查游戏是否结束
|
||||
done = done or self.state.is_game_over()
|
||||
return self._get_observation(), reward, done, {}
|
||||
|
||||
# 检查游戏结束条件
|
||||
check_blood_battle(self.engine)
|
||||
def _get_observation(self):
|
||||
"""获取玩家当前的观察空间"""
|
||||
player_index = self.state.current_player
|
||||
hand = np.zeros(108, dtype=np.int32)
|
||||
melds = np.zeros(108, dtype=np.int32)
|
||||
discard_pile = np.zeros(108, dtype=np.int32)
|
||||
|
||||
if self.engine.game_over: # 检查是否游戏结束
|
||||
done = True
|
||||
|
||||
except ValueError:
|
||||
reward, done = -10, False # 非法操作扣分
|
||||
|
||||
# 切换到下一个玩家
|
||||
self.engine.state.current_player = (self.engine.state.current_player + 1) % 4
|
||||
self.current_round += 1
|
||||
|
||||
# 如果达到最大轮数,结束游戏
|
||||
if self.current_round >= self.max_rounds:
|
||||
done = True
|
||||
reward = 0 # 平局奖励或惩罚(可调整)
|
||||
|
||||
return self.engine.state.hands[self.engine.state.current_player], reward, done, {}
|
||||
|
||||
def handle_win(self):
|
||||
"""
|
||||
处理胡牌后的分数结算和奖励。
|
||||
"""
|
||||
winner = self.engine.state.current_player
|
||||
hand = self.engine.state.hands[winner]
|
||||
melds = self.engine.state.melds[winner]
|
||||
is_self_draw = True # 假设自摸(后续可动态判断)
|
||||
|
||||
conditions = {
|
||||
"is_cleared": is_cleared(hand, melds),
|
||||
"is_seven_pairs": is_seven_pairs(hand),
|
||||
"is_big_pairs": is_big_pairs(hand),
|
||||
# 添加其他条件...
|
||||
}
|
||||
|
||||
# 动态计算番数
|
||||
fan = calculate_fan(hand, melds, is_self_draw, is_cleared, conditions)
|
||||
|
||||
# 动态计算得分
|
||||
scores = calculate_score(fan, self.base_score, is_self_draw)
|
||||
self.scores[winner] += scores["winner"]
|
||||
for i, score in enumerate(scores["loser"]):
|
||||
self.scores[i] += score # 扣分
|
||||
|
||||
# 奖励设置为赢家得分
|
||||
reward = scores["winner"]
|
||||
self.engine.state.winners.append(winner) # 添加赢家到列表
|
||||
return reward, True # 胡牌结束当前局
|
||||
|
||||
def render(self, mode="human"):
|
||||
"""
|
||||
打印游戏状态信息,便于调试。
|
||||
"""
|
||||
print(f"当前轮数: {self.current_round}")
|
||||
print("玩家分数:", self.scores)
|
||||
print("当前玩家状态:", self.engine.state.hands[self.engine.state.current_player])
|
||||
# 填充手牌、明牌和弃牌信息
|
||||
for tile, count in self.state.hands[player_index].tile_count.items():
|
||||
hand[tile.index] = count
|
||||
for meld in self.state.melds[player_index]:
|
||||
melds[meld.tile.index] += meld.count
|
||||
for tile in self.state.discards[player_index]:
|
||||
discard_pile[tile.index] += 1
|
||||
|
||||
return {
|
||||
"hand": hand,
|
||||
"melds": melds,
|
||||
"discard_pile": discard_pile,
|
||||
"dealer": self.state.current_player
|
||||
}
|
||||
Reference in New Issue
Block a user