parent
7632edd0e3
commit
14c811f6b9
|
|
@ -142,4 +142,5 @@
|
|||
|
||||
## 成都麻将规则建模
|
||||
|
||||
麻将游戏引擎建模代码于项目根src/engine/目录下
|
||||
麻将游戏引擎建模代码于项目根src/engine/目录下。
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ class ChengduMahjongState:
|
|||
self.deck = list(range(108)) # 0-107 表示108张牌
|
||||
# 当前玩家索引
|
||||
self.current_player = 0
|
||||
# 玩家分数
|
||||
self.scores = 0
|
||||
# 剩余牌数量
|
||||
self.remaining_tiles = 108
|
||||
# 胜利玩家列表
|
||||
|
|
|
|||
|
|
@ -1,34 +1,70 @@
|
|||
import gym
|
||||
from gym import spaces
|
||||
import numpy as np
|
||||
from gym import spaces
|
||||
|
||||
|
||||
from src.engine.calculate_fan import calculate_fan, is_seven_pairs, is_cleared, is_big_pairs
|
||||
from src.engine.chengdu_mahjong_engine import ChengduMahjongEngine
|
||||
from src.engine.scoring import calculate_score
|
||||
|
||||
|
||||
class MahjongEnv(gym.Env):
|
||||
def __init__(self):
|
||||
super(MahjongEnv, self).__init__()
|
||||
self.engine = ChengduMahjongEngine()
|
||||
self.action_space = spaces.Discrete(108) # 假设108种动作(摸牌/打牌)
|
||||
self.scores = [100, 100, 100, 100] # 四位玩家初始分数
|
||||
self.base_score = 1 # 底分
|
||||
self.action_space = spaces.Discrete(108) # 动作空间:打牌的索引
|
||||
self.observation_space = spaces.Box(low=0, high=4, shape=(108,), dtype=np.int32)
|
||||
|
||||
def reset(self):
|
||||
self.engine = ChengduMahjongEngine()
|
||||
self.scores = [100, 100, 100, 100] # 每局重置分数
|
||||
return self.engine.state.hands[self.engine.state.current_player]
|
||||
|
||||
def step(self, action):
|
||||
reward = 0
|
||||
done = False
|
||||
try:
|
||||
# 执行玩家动作
|
||||
self.engine.discard_tile(action)
|
||||
reward = self.calculate_reward() # 根据胡牌等状态定义奖励
|
||||
# 检查是否胡牌
|
||||
if self.engine.state.can_win(self.engine.state.hands[self.engine.state.current_player]):
|
||||
reward, done = self.handle_win()
|
||||
else:
|
||||
reward, done = -1, False # 默认小惩罚
|
||||
except ValueError:
|
||||
reward = -10 # 非法操作扣分
|
||||
reward, done = -10, False # 非法操作扣分
|
||||
|
||||
return self.engine.state.hands[self.engine.state.current_player], reward, done, {}
|
||||
|
||||
def calculate_reward(self):
|
||||
if self.engine.state.can_win(self.engine.state.hands[self.engine.state.current_player]):
|
||||
return 100 # 胡牌奖励
|
||||
return -1 # 默认每步小惩罚
|
||||
def handle_win(self):
|
||||
"""
|
||||
处理胡牌后的分数结算和奖励。
|
||||
"""
|
||||
winner = self.engine.state.current_player
|
||||
hand = self.engine.state.hands[winner]
|
||||
melds = self.engine.state.melds[winner]
|
||||
is_self_draw = True # 假设自摸(后续可动态判断)
|
||||
is_cleared = is_cleared(hand)
|
||||
conditions = {
|
||||
"is_seven_pairs": is_seven_pairs(hand),
|
||||
"is_big_pairs": is_big_pairs(hand),
|
||||
# 添加其他条件...
|
||||
}
|
||||
|
||||
# 动态计算番数
|
||||
fan = calculate_fan(hand, melds, is_self_draw, is_cleared, conditions)
|
||||
|
||||
# 动态计算得分
|
||||
scores = calculate_score(fan, self.base_score, is_self_draw)
|
||||
self.scores[winner] += scores["winner"]
|
||||
for i, score in enumerate(scores["loser"]):
|
||||
self.scores[i] += score # 扣分
|
||||
|
||||
# 奖励设置为赢家得分
|
||||
reward = scores["winner"]
|
||||
done = True # 游戏结束
|
||||
return reward, done
|
||||
|
||||
def render(self, mode="human"):
|
||||
print("当前状态:", self.engine.state.hands)
|
||||
|
||||
print("当前分数:", self.scores)
|
||||
print("当前状态:", self.engine.state.hands[self.engine.state.current_player])
|
||||
|
|
|
|||
Loading…
Reference in New Issue