1
pull/1/head
wsy182 2024-11-30 20:02:04 +08:00
parent b78d6a17a4
commit 9d95edfa11
4 changed files with 22 additions and 11 deletions

View File

@ -7,7 +7,7 @@ def train_model():
env = MahjongEnv() env = MahjongEnv()
# 使用 PPO 算法训练模型 # 使用 PPO 算法训练模型
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log="./ppo_mahjong_tensorboard/") model = PPO("MlpPolicy", env, verbose=1, tensorboard_log="../logs/ppo_mahjong_tensorboard/")
# 训练模型训练总步数为100000 # 训练模型训练总步数为100000
model.learn(total_timesteps=100000) model.learn(total_timesteps=100000)

View File

@ -2,24 +2,26 @@ from loguru import logger
from src.engine.utils import get_tile_name from src.engine.utils import get_tile_name
def draw_tile(self): def draw_tile(engine):
""" """
当前玩家摸牌逻辑记录牌的详细信息和游戏状态 当前玩家摸牌逻辑记录牌的详细信息和游戏状态
""" """
if self.state.remaining_tiles == 0: if engine.state.remaining_tiles == 0:
logger.warning("牌堆已空,游戏结束!") logger.warning("牌堆已空,游戏结束!")
self.game_over = True engine.game_over = True
return "牌堆已空" return 0, True # 游戏结束时返回 0 和 done = True
tile = self.state.deck.pop(0) # 从牌堆中取出一张牌 tile = engine.state.deck.pop(0) # 从牌堆中取出一张牌
self.state.remaining_tiles -= 1 # 更新剩余牌数 engine.state.remaining_tiles -= 1 # 更新剩余牌数
self.state.hands[self.state.current_player][tile] += 1 # 加入当前玩家手牌 engine.state.hands[engine.state.current_player][tile] += 1 # 加入当前玩家手牌
tile_name = get_tile_name(tile) # 获取具体的牌名 tile_name = get_tile_name(tile) # 获取具体的牌名
logger.info( logger.info(
f"玩家 {self.state.current_player} 摸到一张牌: {tile_name}(索引 {tile})。剩余牌堆数量: {self.state.remaining_tiles}" f"玩家 {engine.state.current_player} 摸到一张牌: {tile_name}(索引 {tile})。剩余牌堆数量: {engine.state.remaining_tiles}"
) )
return tile
# 返回奖励和游戏是否结束的标志
return 0, False # 奖励为 0done 为 False游戏继续
def discard_tile(self, tile): def discard_tile(self, tile):
@ -66,15 +68,22 @@ def gang(self, tile, mode):
self.state.hands[player][tile] -= 3 self.state.hands[player][tile] -= 3
self.state.melds[player].append(("ming_gang", tile)) self.state.melds[player].append(("ming_gang", tile))
logger.info(f"玩家 {player} 明杠: {tile_name}(索引 {tile}") logger.info(f"玩家 {player} 明杠: {tile_name}(索引 {tile}")
self.state.scores[player] += 1 # 奖励1分
logger.info(f"玩家 {player} 因明杠获得1分")
elif mode == "an" and self.state.hands[player][tile] == 4: elif mode == "an" and self.state.hands[player][tile] == 4:
self.state.hands[player][tile] -= 4 self.state.hands[player][tile] -= 4
self.state.melds[player].append(("an_gang", tile)) self.state.melds[player].append(("an_gang", tile))
logger.info(f"玩家 {player} 暗杠: {tile_name}(索引 {tile}") logger.info(f"玩家 {player} 暗杠: {tile_name}(索引 {tile}")
self.state.scores[player] += 1 # 奖励1分
logger.info(f"玩家 {player} 因暗杠获得1分")
else: else:
logger.error(f"玩家 {player} 尝试杠牌失败: {tile_name}(索引 {tile}),条件不满足") logger.error(f"玩家 {player} 尝试杠牌失败: {tile_name}(索引 {tile}),条件不满足")
raise ValueError("杠牌条件不满足") raise ValueError("杠牌条件不满足")
def check_blood_battle(self): def check_blood_battle(self):
""" """
检查游戏是否流局或血战结束记录状态 检查游戏是否流局或血战结束记录状态

View File

@ -15,7 +15,7 @@ class ChengduMahjongState:
# 当前玩家索引 # 当前玩家索引
self.current_player = 0 self.current_player = 0
# 玩家分数 # 玩家分数
self.scores = 0 self.scores = [100, 100, 100, 100]
# 剩余牌数量 # 剩余牌数量
self.remaining_tiles = 108 self.remaining_tiles = 108
# 胜利玩家列表 # 胜利玩家列表

2
test.py Normal file
View File

@ -0,0 +1,2 @@
import torch
print(torch.cuda.is_available()) # 如果返回True说明可以使用GPU