1
This commit is contained in:
2024-11-30 18:28:19 +08:00
parent 14c811f6b9
commit 2a5680fae9
3 changed files with 92 additions and 122 deletions

View File

@@ -2,7 +2,6 @@ import gym
import numpy as np
from gym import spaces
from src.engine.calculate_fan import calculate_fan, is_seven_pairs, is_cleared, is_big_pairs
from src.engine.chengdu_mahjong_engine import ChengduMahjongEngine
from src.engine.scoring import calculate_score
@@ -14,12 +13,15 @@ class MahjongEnv(gym.Env):
self.engine = ChengduMahjongEngine()
self.scores = [100, 100, 100, 100] # 四位玩家初始分数
self.base_score = 1 # 底分
self.max_rounds = 100 # 最大轮数,防止游戏无限进行
self.current_round = 0 # 当前轮数
self.action_space = spaces.Discrete(108) # 动作空间:打牌的索引
self.observation_space = spaces.Box(low=0, high=4, shape=(108,), dtype=np.int32)
def reset(self):
self.engine = ChengduMahjongEngine()
self.scores = [100, 100, 100, 100] # 每局重置分数
self.current_round = 0
return self.engine.state.hands[self.engine.state.current_player]
def step(self, action):
@@ -31,9 +33,23 @@ class MahjongEnv(gym.Env):
reward, done = self.handle_win()
else:
reward, done = -1, False # 默认小惩罚
# 检查是否有玩家分数 <= 0
if any(score <= 0 for score in self.scores):
done = True
reward = -100 # 游戏结束的惩罚(可根据需求调整)
except ValueError:
reward, done = -10, False # 非法操作扣分
# 切换到下一个玩家
self.engine.state.current_player = (self.engine.state.current_player + 1) % 4
self.current_round += 1
# 如果达到最大轮数,结束游戏
if self.current_round >= self.max_rounds:
done = True
reward = 0 # 平局奖励或惩罚(可调整)
return self.engine.state.hands[self.engine.state.current_player], reward, done, {}
def handle_win(self):
@@ -44,8 +60,9 @@ class MahjongEnv(gym.Env):
hand = self.engine.state.hands[winner]
melds = self.engine.state.melds[winner]
is_self_draw = True # 假设自摸(后续可动态判断)
is_cleared = is_cleared(hand)
conditions = {
"is_cleared": is_cleared(hand),
"is_seven_pairs": is_seven_pairs(hand),
"is_big_pairs": is_big_pairs(hand),
# 添加其他条件...
@@ -62,9 +79,10 @@ class MahjongEnv(gym.Env):
# 奖励设置为赢家得分
reward = scores["winner"]
done = True # 游戏结束
done = True # 胡牌结束当前局
return reward, done
def render(self, mode="human"):
print("当前数:", self.scores)
print("当前状态:", self.engine.state.hands[self.engine.state.current_player])
print(f"当前数: {self.current_round}")
print("玩家分数:", self.scores)
print("当前玩家状态:", self.engine.state.hands[self.engine.state.current_player])