Compare commits
2 Commits
9f7a22be7f
...
b78d6a17a4
| Author | SHA1 | Date | |
|---|---|---|---|
| b78d6a17a4 | |||
| 3487c805d4 |
27
scripts/train_chengdu_mahjong_model.py
Normal file
27
scripts/train_chengdu_mahjong_model.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import gym
|
||||
from stable_baselines3 import PPO
|
||||
from src.environment.chengdu_majiang_env import MahjongEnv
|
||||
|
||||
def train_model():
|
||||
# 创建 MahjongEnv 环境实例
|
||||
env = MahjongEnv()
|
||||
|
||||
# 使用 PPO 算法训练模型
|
||||
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log="./ppo_mahjong_tensorboard/")
|
||||
|
||||
# 训练模型,训练总步数为100000
|
||||
model.learn(total_timesteps=100000)
|
||||
|
||||
# 保存训练后的模型
|
||||
model.save("ppo_mahjong_model")
|
||||
|
||||
# 测试模型
|
||||
obs = env.reset()
|
||||
done = False
|
||||
while not done:
|
||||
action, _states = model.predict(obs) # 使用训练好的模型来选择动作
|
||||
obs, reward, done, info = env.step(action) # 执行动作
|
||||
env.render() # 打印环境状态
|
||||
|
||||
if __name__ == "__main__":
|
||||
train_model()
|
||||
@@ -1,5 +1,5 @@
|
||||
from loguru import logger
|
||||
from utils import get_tile_name
|
||||
from src.engine.utils import get_tile_name
|
||||
|
||||
|
||||
def draw_tile(self):
|
||||
|
||||
@@ -26,23 +26,30 @@ class MahjongEnv(gym.Env):
|
||||
|
||||
def step(self, action):
|
||||
"""
|
||||
每一步游戏,基于玩家的动作更新游戏状态。
|
||||
action:表示玩家的动作,可以是摸牌、打牌、碰牌等。
|
||||
执行玩家动作并更新游戏状态。
|
||||
|
||||
参数:
|
||||
- action: 玩家动作,0 代表摸牌,1 代表打牌,2 代表碰牌,3 代表杠牌
|
||||
|
||||
返回:
|
||||
- next_state: 当前玩家的手牌
|
||||
- reward: 奖励
|
||||
- done: 是否结束
|
||||
- info: 其他信息(如奖励历史等)
|
||||
"""
|
||||
done = False
|
||||
reward = 0
|
||||
|
||||
# 根据action类型选择执行的动作
|
||||
try:
|
||||
if action == 0: # 0代表摸牌
|
||||
reward, done = draw_tile(self.engine)
|
||||
reward, done = draw_tile(self.engine) # 调用摸牌函数
|
||||
elif action == 1: # 1代表打牌
|
||||
tile = self.engine.state.hands[self.engine.state.current_player][0] # 假设选择第一张牌
|
||||
discard_tile(self.engine, tile)
|
||||
discard_tile(self.engine, tile) # 调用打牌函数
|
||||
reward, done = -1, False
|
||||
elif action == 2: # 2代表碰牌
|
||||
tile = self.engine.state.hands[self.engine.state.current_player][0] # 假设选择第一张牌
|
||||
peng(self.engine, tile)
|
||||
peng(self.engine, tile) # 调用碰牌函数
|
||||
reward, done = 0, False
|
||||
elif action == 3: # 3代表杠牌
|
||||
tile = self.engine.state.hands[self.engine.state.current_player][0] # 假设选择第一张牌
|
||||
@@ -51,12 +58,12 @@ class MahjongEnv(gym.Env):
|
||||
|
||||
# 检查是否胡牌
|
||||
if self.engine.state.can_win(self.engine.state.hands[self.engine.state.current_player]):
|
||||
reward, done = self.handle_win()
|
||||
reward, done = self.handle_win() # 胡牌时处理胜利逻辑
|
||||
|
||||
# 检查游戏结束条件
|
||||
check_blood_battle(self.engine)
|
||||
|
||||
if self.engine.game_over:
|
||||
if self.engine.game_over: # 检查是否游戏结束
|
||||
done = True
|
||||
|
||||
except ValueError:
|
||||
@@ -101,10 +108,13 @@ class MahjongEnv(gym.Env):
|
||||
# 奖励设置为赢家得分
|
||||
reward = scores["winner"]
|
||||
self.engine.state.winners.append(winner) # 添加赢家到列表
|
||||
|
||||
# 如果有玩家分数 <= 0,可进行其他处理,如记录惩罚或结束游戏
|
||||
if any(score <= 0 for score in self.scores):
|
||||
self.engine.game_over = True # 设置游戏结束标志
|
||||
|
||||
return reward, True # 胡牌结束当前局
|
||||
|
||||
def render(self, mode="human"):
|
||||
"""
|
||||
打印游戏状态信息,便于调试。
|
||||
"""
|
||||
print(f"当前轮数: {self.current_round}")
|
||||
print("玩家分数:", self.scores)
|
||||
print("当前玩家状态:", self.engine.state.hands[self.engine.state.current_player])
|
||||
|
||||
|
||||
Reference in New Issue
Block a user