我一直试图用本文中的DQN来解决OpenAI月球着陆器的游戏。
https://arxiv.org/pdf/2006.04938v2.pdf
问题是训练50集需要12个小时,所以一定是出了问题。
import os
import random
import gym
import numpy as np
from collections import deque
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model
ENV_NAME = "LunarLander-v2"
DISCOUNT_FACTOR = 0.9
LEARNING_RATE = 0.001
MEMORY_SIZE = 2000
TRAIN_START = 1000
BATCH_SIZE = 24
EXPLORATION_MAX = 1.0
EXPLORATION_MIN = 0.01
EXPLORATION_DECAY = 0.99
class MyModel(Model):
def __init__(self, input_size, output_size):
super(MyModel, self).__init__()
self.d1 = Dense(128, input_shape=(input_size,), activation="relu")
self.d2 = Dense(128, activation="relu")
self.d3 = Dense(output_size, activation="linear")
def call(self, x):
x = self.d1(x)
x = self.d2(x)
return self.d3(x)
class DQNSolver():
def __init__(self, observation_space, action_space):
self.exploration_rate = EXPLORATION_MAX
self.action_space = action_space
self.memory = deque(maxlen=MEMORY_SIZE)
self.model = MyModel(observation_space,action_space)
self.model.compile(loss="mse", optimizer=Adam(lr=LEARNING_RATE))
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def act(self, state):
if np.random.rand() < self.exploration_rate:
return random.randrange(self.action_space)
q_values = self.model.predict(state)
return np.argmax(q_values[0])
def experience_replay(self):
if len(self.memory) < BATCH_SIZE:
return
batch = random.sample(self.memory, BATCH_SIZE)
state_batch, q_values_batch = [], []
for state, action, reward, state_next, terminal in batch:
# q-value prediction for a given state
q_values_cs = self.model.predict(state)
# target q-value
max_q_value_ns = np.amax(self.model.predict(state_next)[0])
# correction on the Q value for the action used
if terminal:
q_values_cs[0][action] = reward
else:
q_values_cs[0][action] = reward + DISCOUNT_FACTOR * max_q_value_ns
state_batch.append(state[0])
q_values_batch.append(q_values_cs[0])
# train the Q network
self.model.fit(np.array(state_batch),
np.array(q_values_batch),
batch_size = BATCH_SIZE,
epochs = 1, verbose = 0)
self.exploration_rate *= EXPLORATION_DECAY
self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)
def lunar_lander():
env = gym.make(ENV_NAME)
observation_space = env.observation_space.shape[0]
action_space = env.action_space.n
dqn_solver = DQNSolver(observation_space, action_space)
episode = 0
print("Running")
while True:
episode += 1
state = env.reset()
state = np.reshape(state, [1, observation_space])
scores = []
score = 0
while True:
action = dqn_solver.act(state)
state_next, reward, terminal, _ = env.step(action)
state_next = np.reshape(state_next, [1, observation_space])
dqn_solver.remember(state, action, reward, state_next, terminal)
dqn_solver.experience_replay()
state = state_next
score += reward
if terminal:
print("Episode: " + str(episode) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(score))
scores.append(score)
break
if np.mean(scores[-min(100, len(scores)):]) >= 195:
print("Problem is solved in {} episodes.".format(episode))
break
env.close
if __name__ == "__main__":
lunar_lander()这是原木
root@b11438e3d3e8:~# /usr/bin/python3 /root/test.py
2021-01-03 13:42:38.055593: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2021-01-03 13:42:39.338231: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1
2021-01-03 13:42:39.368192: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.368693: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: GeForce GTX 1080 computeCapability: 6.1
coreClock: 1.8095GHz coreCount: 20 deviceMemorySize: 7.92GiB deviceMemoryBandwidth: 298.32GiB/s
2021-01-03 13:42:39.368729: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2021-01-03 13:42:39.370269: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2021-01-03 13:42:39.371430: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2021-01-03 13:42:39.371704: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10
2021-01-03 13:42:39.373318: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10
2021-01-03 13:42:39.374243: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10
2021-01-03 13:42:39.377939: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7
2021-01-03 13:42:39.378118: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.378702: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.379127: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0
2021-01-03 13:42:39.386525: I tensorflow/core/platform/profile_utils/cpu_utils.cc:104] CPU Frequency: 3411185000 Hz
2021-01-03 13:42:39.386867: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4fb44c0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2021-01-03 13:42:39.386891: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version
2021-01-03 13:42:39.498097: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.498786: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4fdf030 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2021-01-03 13:42:39.498814: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): GeForce GTX 1080, Compute Capability 6.1
2021-01-03 13:42:39.498987: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.499416: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: GeForce GTX 1080 computeCapability: 6.1
coreClock: 1.8095GHz coreCount: 20 deviceMemorySize: 7.92GiB deviceMemoryBandwidth: 298.32GiB/s
2021-01-03 13:42:39.499448: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2021-01-03 13:42:39.499483: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2021-01-03 13:42:39.499504: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2021-01-03 13:42:39.499523: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10
2021-01-03 13:42:39.499543: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10
2021-01-03 13:42:39.499562: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10
2021-01-03 13:42:39.499581: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7
2021-01-03 13:42:39.499643: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.500113: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.500730: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0
2021-01-03 13:42:39.500772: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2021-01-03 13:42:39.915228: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix:
2021-01-03 13:42:39.915298: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1263] 0
2021-01-03 13:42:39.915322: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1276] 0: N
2021-01-03 13:42:39.915568: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.916104: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.916555: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 6668 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1080, pci bus id: 0000:01:00.0, compute capability: 6.1)
Running
2021-01-03 13:42:40.267699: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10这是GPU的数据
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.66 Driver Version: 450.66 CUDA Version: 11.0 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 GeForce GTX 1080 Off | 00000000:01:00.0 On | N/A |
| 0% 53C P2 46W / 198W | 7718MiB / 8111MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+正如你所看到的,TensorFlow不会在GPU上计算,而是保留内存,所以我假设这是因为神经网络的输入太小,它使用的是CPU。
为了确保GPU安装正确,我从他们的文档中运行了一个示例,它使用GPU。您也可以在google 这里上重现这个问题。
这是算法的问题还是代码的问题?
在这种情况下,有什么方法可以利用GPU吗?
更新
结果是代理商学会了飞行而不是学习降落,所以我增加了一个最大的150步来限制插播时间,但速度仍然很慢。
发布于 2021-01-22 23:50:18
https://stackoverflow.com/questions/65555792
复制相似问题