1
1
This commit is contained in:
@@ -7,7 +7,7 @@ def train_model():
|
||||
env = MahjongEnv()
|
||||
|
||||
# 使用 PPO 算法训练模型
|
||||
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log="./ppo_mahjong_tensorboard/")
|
||||
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log="../logs/ppo_mahjong_tensorboard/")
|
||||
|
||||
# 训练模型,训练总步数为100000
|
||||
model.learn(total_timesteps=100000)
|
||||
|
||||
Reference in New Issue
Block a user