@@ -1,34 +1,70 @@
import gym
from gym import spaces
import numpy as np
from gym import spaces
from src . engine . calculate_fan import calculate_fan , is_seven_pairs , is_cleared , is_big_pairs
from src . engine . chengdu_mahjong_engine import ChengduMahjongEngine
from src . engine . scoring import calculate_score
class MahjongEnv ( gym . Env ) :
def __init__ ( self ) :
super ( MahjongEnv , self ) . __init__ ( )
self . engine = ChengduMahjongEngine ( )
self . action_space = spaces . Discrete ( 108 ) # 假设108种动作( 摸牌/打牌)
self . scores = [ 100 , 100 , 100 , 100 ] # 四位玩家初始分数
self . base_score = 1 # 底分
self . action_space = spaces . Discrete ( 108 ) # 动作空间:打牌的索引
self . observation_space = spaces . Box ( low = 0 , high = 4 , shape = ( 108 , ) , dtype = np . int32 )
def reset ( self ) :
self . engine = ChengduMahjongEngine ( )
self . scores = [ 100 , 100 , 100 , 100 ] # 每局重置分数
return self . engine . state . hands [ self . engine . state . current_player ]
def step ( self , action ) :
reward = 0
done = False
try :
# 执行玩家动作
self . engine . discard_tile ( action )
reward = self . calculate_reward ( ) # 根据胡牌等状态定义奖励
# 检查是否胡牌
if self . engine . state . can_win ( self . engine . state . hands [ self . engine . state . current_player ] ) :
reward , done = self . handle_win ( )
else :
reward , done = - 1 , False # 默认小惩罚
except ValueError :
reward = - 10 # 非法操作扣分
reward , done = - 10 , False # 非法操作扣分
return self . engine . state . hands [ self . engine . state . current_player ] , reward , done , { }
def calculate_reward ( self ) :
if self . engine . state . can_win ( self . engine . state . hands [ self . engine . state . current_player ] ) :
return 100 # 胡牌奖励
return - 1 # 默认每步小惩罚
def handle_win ( self ) :
"""
处理胡牌后的分数结算和奖励。
"""
winner = self . engine . state . current_player
hand = self . engine . state . hands [ winner ]
melds = self . engine . state . melds [ winner ]
is_self_draw = True # 假设自摸(后续可动态判断)
is_cleared = is_cleared ( hand )
conditions = {
" is_seven_pairs " : is_seven_pairs ( hand ) ,
" is_big_pairs " : is_big_pairs ( hand ) ,
# 添加其他条件...
}
# 动态计算番数
fan = calculate_fan ( hand , melds , is_self_draw , is_cleared , conditions )
# 动态计算得分
scores = calculate_score ( fan , self . base_score , is_self_draw )
self . scores [ winner ] + = scores [ " winner " ]
for i , score in enumerate ( scores [ " loser " ] ) :
self . scores [ i ] + = score # 扣分
# 奖励设置为赢家得分
reward = scores [ " winner " ]
done = True # 游戏结束
return reward , done
def render ( self , mode = " human " ) :
print ( " 当前状态: " , self . engine . state . hand s)
print ( " 当前分数: " , self . score s)
print ( " 当前状态: " , self . engine . state . hands [ self . engine . state . current_player ] )