parent
b78d6a17a4
commit
9d95edfa11
|
|
@ -7,7 +7,7 @@ def train_model():
|
||||||
env = MahjongEnv()
|
env = MahjongEnv()
|
||||||
|
|
||||||
# 使用 PPO 算法训练模型
|
# 使用 PPO 算法训练模型
|
||||||
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log="./ppo_mahjong_tensorboard/")
|
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log="../logs/ppo_mahjong_tensorboard/")
|
||||||
|
|
||||||
# 训练模型,训练总步数为100000
|
# 训练模型,训练总步数为100000
|
||||||
model.learn(total_timesteps=100000)
|
model.learn(total_timesteps=100000)
|
||||||
|
|
|
||||||
|
|
@ -2,24 +2,26 @@ from loguru import logger
|
||||||
from src.engine.utils import get_tile_name
|
from src.engine.utils import get_tile_name
|
||||||
|
|
||||||
|
|
||||||
def draw_tile(self):
|
def draw_tile(engine):
|
||||||
"""
|
"""
|
||||||
当前玩家摸牌逻辑,记录牌的详细信息和游戏状态。
|
当前玩家摸牌逻辑,记录牌的详细信息和游戏状态。
|
||||||
"""
|
"""
|
||||||
if self.state.remaining_tiles == 0:
|
if engine.state.remaining_tiles == 0:
|
||||||
logger.warning("牌堆已空,游戏结束!")
|
logger.warning("牌堆已空,游戏结束!")
|
||||||
self.game_over = True
|
engine.game_over = True
|
||||||
return "牌堆已空"
|
return 0, True # 游戏结束时返回 0 和 done = True
|
||||||
|
|
||||||
tile = self.state.deck.pop(0) # 从牌堆中取出一张牌
|
tile = engine.state.deck.pop(0) # 从牌堆中取出一张牌
|
||||||
self.state.remaining_tiles -= 1 # 更新剩余牌数
|
engine.state.remaining_tiles -= 1 # 更新剩余牌数
|
||||||
self.state.hands[self.state.current_player][tile] += 1 # 加入当前玩家手牌
|
engine.state.hands[engine.state.current_player][tile] += 1 # 加入当前玩家手牌
|
||||||
|
|
||||||
tile_name = get_tile_name(tile) # 获取具体的牌名
|
tile_name = get_tile_name(tile) # 获取具体的牌名
|
||||||
logger.info(
|
logger.info(
|
||||||
f"玩家 {self.state.current_player} 摸到一张牌: {tile_name}(索引 {tile})。剩余牌堆数量: {self.state.remaining_tiles}"
|
f"玩家 {engine.state.current_player} 摸到一张牌: {tile_name}(索引 {tile})。剩余牌堆数量: {engine.state.remaining_tiles}"
|
||||||
)
|
)
|
||||||
return tile
|
|
||||||
|
# 返回奖励和游戏是否结束的标志
|
||||||
|
return 0, False # 奖励为 0,done 为 False(游戏继续)
|
||||||
|
|
||||||
|
|
||||||
def discard_tile(self, tile):
|
def discard_tile(self, tile):
|
||||||
|
|
@ -66,15 +68,22 @@ def gang(self, tile, mode):
|
||||||
self.state.hands[player][tile] -= 3
|
self.state.hands[player][tile] -= 3
|
||||||
self.state.melds[player].append(("ming_gang", tile))
|
self.state.melds[player].append(("ming_gang", tile))
|
||||||
logger.info(f"玩家 {player} 明杠: {tile_name}(索引 {tile})")
|
logger.info(f"玩家 {player} 明杠: {tile_name}(索引 {tile})")
|
||||||
|
self.state.scores[player] += 1 # 奖励1分
|
||||||
|
logger.info(f"玩家 {player} 因明杠获得1分")
|
||||||
|
|
||||||
elif mode == "an" and self.state.hands[player][tile] == 4:
|
elif mode == "an" and self.state.hands[player][tile] == 4:
|
||||||
self.state.hands[player][tile] -= 4
|
self.state.hands[player][tile] -= 4
|
||||||
self.state.melds[player].append(("an_gang", tile))
|
self.state.melds[player].append(("an_gang", tile))
|
||||||
logger.info(f"玩家 {player} 暗杠: {tile_name}(索引 {tile})")
|
logger.info(f"玩家 {player} 暗杠: {tile_name}(索引 {tile})")
|
||||||
|
self.state.scores[player] += 1 # 奖励1分
|
||||||
|
logger.info(f"玩家 {player} 因暗杠获得1分")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
logger.error(f"玩家 {player} 尝试杠牌失败: {tile_name}(索引 {tile}),条件不满足")
|
logger.error(f"玩家 {player} 尝试杠牌失败: {tile_name}(索引 {tile}),条件不满足")
|
||||||
raise ValueError("杠牌条件不满足")
|
raise ValueError("杠牌条件不满足")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def check_blood_battle(self):
|
def check_blood_battle(self):
|
||||||
"""
|
"""
|
||||||
检查游戏是否流局或血战结束,记录状态。
|
检查游戏是否流局或血战结束,记录状态。
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ class ChengduMahjongState:
|
||||||
# 当前玩家索引
|
# 当前玩家索引
|
||||||
self.current_player = 0
|
self.current_player = 0
|
||||||
# 玩家分数
|
# 玩家分数
|
||||||
self.scores = 0
|
self.scores = [100, 100, 100, 100]
|
||||||
# 剩余牌数量
|
# 剩余牌数量
|
||||||
self.remaining_tiles = 108
|
self.remaining_tiles = 108
|
||||||
# 胜利玩家列表
|
# 胜利玩家列表
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue