This commit is contained in:
2024-11-30 19:15:24 +08:00
parent 2a5680fae9
commit 9f7a22be7f
4 changed files with 54 additions and 22 deletions

View File

@@ -1,7 +1,7 @@
import gym
import numpy as np
from gym import spaces
from src.engine.actions import draw_tile, discard_tile, peng, gang, check_blood_battle
from src.engine.calculate_fan import calculate_fan, is_seven_pairs, is_cleared, is_big_pairs
from src.engine.chengdu_mahjong_engine import ChengduMahjongEngine
from src.engine.scoring import calculate_score
@@ -25,19 +25,40 @@ class MahjongEnv(gym.Env):
return self.engine.state.hands[self.engine.state.current_player]
def step(self, action):
"""
每一步游戏,基于玩家的动作更新游戏状态。
action表示玩家的动作可以是摸牌、打牌、碰牌等。
"""
done = False
reward = 0
# 根据action类型选择执行的动作
try:
# 执行玩家动作
self.engine.discard_tile(action)
if action == 0: # 0代表摸牌
reward, done = draw_tile(self.engine)
elif action == 1: # 1代表打牌
tile = self.engine.state.hands[self.engine.state.current_player][0] # 假设选择第一张牌
discard_tile(self.engine, tile)
reward, done = -1, False
elif action == 2: # 2代表碰牌
tile = self.engine.state.hands[self.engine.state.current_player][0] # 假设选择第一张牌
peng(self.engine, tile)
reward, done = 0, False
elif action == 3: # 3代表杠牌
tile = self.engine.state.hands[self.engine.state.current_player][0] # 假设选择第一张牌
gang(self.engine, tile, mode="ming") # 暂时假设为明杠
reward, done = 0, False
# 检查是否胡牌
if self.engine.state.can_win(self.engine.state.hands[self.engine.state.current_player]):
reward, done = self.handle_win()
else:
reward, done = -1, False # 默认小惩罚
# 检查是否有玩家分数 <= 0
if any(score <= 0 for score in self.scores):
# 检查游戏结束条件
check_blood_battle(self.engine)
if self.engine.game_over:
done = True
reward = -100 # 游戏结束的惩罚(可根据需求调整)
except ValueError:
reward, done = -10, False # 非法操作扣分
@@ -62,7 +83,7 @@ class MahjongEnv(gym.Env):
is_self_draw = True # 假设自摸(后续可动态判断)
conditions = {
"is_cleared": is_cleared(hand),
"is_cleared": is_cleared(hand, melds),
"is_seven_pairs": is_seven_pairs(hand),
"is_big_pairs": is_big_pairs(hand),
# 添加其他条件...
@@ -79,10 +100,11 @@ class MahjongEnv(gym.Env):
# 奖励设置为赢家得分
reward = scores["winner"]
done = True # 胡牌结束当前局
return reward, done
self.engine.state.winners.append(winner) # 添加赢家到列表
# 如果有玩家分数 <= 0可进行其他处理如记录惩罚或结束游戏
if any(score <= 0 for score in self.scores):
self.engine.game_over = True # 设置游戏结束标志
return reward, True # 胡牌结束当前局
def render(self, mode="human"):
print(f"当前轮数: {self.current_round}")
print("玩家分数:", self.scores)
print("当前玩家状态:", self.engine.state.hands[self.engine.state.current_player])