From 9d95edfa116bbfc54e7e894209f7fbe2c49098f1 Mon Sep 17 00:00:00 2001 From: wsy182 <2392948297@qq.com> Date: Sat, 30 Nov 2024 20:02:04 +0800 Subject: [PATCH] 1 1 --- scripts/train_chengdu_mahjong_model.py | 2 +- src/engine/actions.py | 27 +++++++++++++++++--------- src/engine/game_state.py | 2 +- test.py | 2 ++ 4 files changed, 22 insertions(+), 11 deletions(-) create mode 100644 test.py diff --git a/scripts/train_chengdu_mahjong_model.py b/scripts/train_chengdu_mahjong_model.py index 4ad8138..7afbafb 100644 --- a/scripts/train_chengdu_mahjong_model.py +++ b/scripts/train_chengdu_mahjong_model.py @@ -7,7 +7,7 @@ def train_model(): env = MahjongEnv() # 使用 PPO 算法训练模型 - model = PPO("MlpPolicy", env, verbose=1, tensorboard_log="./ppo_mahjong_tensorboard/") + model = PPO("MlpPolicy", env, verbose=1, tensorboard_log="../logs/ppo_mahjong_tensorboard/") # 训练模型,训练总步数为100000 model.learn(total_timesteps=100000) diff --git a/src/engine/actions.py b/src/engine/actions.py index 2659435..f1ccbbe 100644 --- a/src/engine/actions.py +++ b/src/engine/actions.py @@ -2,24 +2,26 @@ from loguru import logger from src.engine.utils import get_tile_name -def draw_tile(self): +def draw_tile(engine): """ 当前玩家摸牌逻辑,记录牌的详细信息和游戏状态。 """ - if self.state.remaining_tiles == 0: + if engine.state.remaining_tiles == 0: logger.warning("牌堆已空,游戏结束!") - self.game_over = True - return "牌堆已空" + engine.game_over = True + return 0, True # 游戏结束时返回 0 和 done = True - tile = self.state.deck.pop(0) # 从牌堆中取出一张牌 - self.state.remaining_tiles -= 1 # 更新剩余牌数 - self.state.hands[self.state.current_player][tile] += 1 # 加入当前玩家手牌 + tile = engine.state.deck.pop(0) # 从牌堆中取出一张牌 + engine.state.remaining_tiles -= 1 # 更新剩余牌数 + engine.state.hands[engine.state.current_player][tile] += 1 # 加入当前玩家手牌 tile_name = get_tile_name(tile) # 获取具体的牌名 logger.info( - f"玩家 {self.state.current_player} 摸到一张牌: {tile_name}(索引 {tile})。剩余牌堆数量: {self.state.remaining_tiles}" + f"玩家 {engine.state.current_player} 摸到一张牌: {tile_name}(索引 {tile})。剩余牌堆数量: {engine.state.remaining_tiles}" ) - return tile + + # 返回奖励和游戏是否结束的标志 + return 0, False # 奖励为 0,done 为 False(游戏继续) def discard_tile(self, tile): @@ -66,15 +68,22 @@ def gang(self, tile, mode): self.state.hands[player][tile] -= 3 self.state.melds[player].append(("ming_gang", tile)) logger.info(f"玩家 {player} 明杠: {tile_name}(索引 {tile})") + self.state.scores[player] += 1 # 奖励1分 + logger.info(f"玩家 {player} 因明杠获得1分") + elif mode == "an" and self.state.hands[player][tile] == 4: self.state.hands[player][tile] -= 4 self.state.melds[player].append(("an_gang", tile)) logger.info(f"玩家 {player} 暗杠: {tile_name}(索引 {tile})") + self.state.scores[player] += 1 # 奖励1分 + logger.info(f"玩家 {player} 因暗杠获得1分") + else: logger.error(f"玩家 {player} 尝试杠牌失败: {tile_name}(索引 {tile}),条件不满足") raise ValueError("杠牌条件不满足") + def check_blood_battle(self): """ 检查游戏是否流局或血战结束,记录状态。 diff --git a/src/engine/game_state.py b/src/engine/game_state.py index 5ec0303..6aa58b0 100644 --- a/src/engine/game_state.py +++ b/src/engine/game_state.py @@ -15,7 +15,7 @@ class ChengduMahjongState: # 当前玩家索引 self.current_player = 0 # 玩家分数 - self.scores = 0 + self.scores = [100, 100, 100, 100] # 剩余牌数量 self.remaining_tiles = 108 # 胜利玩家列表 diff --git a/test.py b/test.py new file mode 100644 index 0000000..581416c --- /dev/null +++ b/test.py @@ -0,0 +1,2 @@ +import torch +print(torch.cuda.is_available()) # 如果返回True,说明可以使用GPU