我编写了一个DQN脚本来播放BreakoutDeterministic,并在我的学校GPU服务器上运行它。但是,代码似乎占用了内存总内存的97% (超过100 it )!
我想知道脚本的哪一部分要求如此高的内存使用量?我在3集中使用了内存分析器,似乎在我的笔记本电脑上的每一步内存需求都是线性增加的。
我用PyCharm (python3.6)编写了脚本。我的笔记本电脑没有GPU的12 is内存,但学校服务器使用的是Ubuntu,p100 GPU。
import gym
import numpy as np
import random
from collections import deque
from keras.layers import Dense, Input, Lambda, convolutional, core
from keras.models import Model
from keras.optimizers import Adam
import matplotlib.pyplot as plt
import os
import time as dt
plt.switch_backend('agg')
def preprocess(state):
process_state = np.mean(state, axis=2).astype(np.uint8)
process_state = process_state[::2, ::2]
process_state_size = list(process_state.shape)
process_state_size.append(1)
process_state = np.reshape(process_state, process_state_size)
return process_state
class DQNAgent:
def __init__(self, env):
self.env = env
self.action_size = env.action_space.n
self.state_size = self.select_state_size()
self.memory = deque(maxlen=1000000) # specify memory size
self.gamma = 0.99
self.eps = 1.0
self.eps_min = 0.01
self.decay = 0.95
self.lr = 0.00025
self.start_life = 5 # get from environment
self.tau = 0.125 # special since 2 models to be trained
self.model = self.create_cnnmodel()
self.target_model = self.create_cnnmodel()
def select_state_size(self):
process_state = preprocess(self.env.reset())
state_size = process_state.shape
return state_size
def create_cnnmodel(self):
data_input = Input(shape=self.state_size, name='data_input', dtype='int32')
normalized = Lambda(lambda x: x/255)(data_input)
conv1 = convolutional.Convolution2D(32, 8, strides=(4, 4), activation='relu')(normalized)
conv2 = convolutional.Convolution2D(64, 4, strides=(2,2), activation='relu')(conv1)
conv3 = convolutional.Convolution2D(64, 3, strides=(1,1), activation='relu')(conv2)
conv_flatten = core.Flatten()(conv3) # flatten to feed cnn to fc
h4 = Dense(512, activation='relu')(conv_flatten)
prediction_output = Dense(self.action_size, name='prediction_output', activation='linear')(h4)
model = Model(inputs=data_input, outputs=prediction_output)
model.compile(optimizer=Adam(lr=self.lr),
loss='mean_squared_error') # 'mean_squared_error') keras.losses.logcosh(y_true, y_pred)
return model
def remember(self, state, action, reward, new_state, done): # store past experience as a pre-defined table
self.memory.append([state, action, reward, new_state, done])
def replay(self, batch_size):
if batch_size > len(self.memory):
return
all_states = []
all_targets = []
samples = random.sample(self.memory, batch_size)
for sample in samples:
state, action, reward, new_state, done = sample
target = self.target_model.predict(state)
if done:
target[0][action] = reward
else:
target[0][action] = reward + self.gamma*np.max(self.target_model.predict(new_state)[0])
all_states.append(state)
all_targets.append(target)
history = self.model.fit(np.vstack(all_states), np.vstack(all_targets), epochs=1, verbose=0)
return history
def act(self, state):
self.eps *= self.decay
self.eps = max(self.eps_min, self.eps)
if np.random.random() < self.eps:
return self.env.action_space.sample()
return np.argmax(self.model.predict(state)[0])
def train_target(self):
weights = self.model.get_weights()
target_weights = self.target_model.get_weights()
for i in range(len(target_weights)):
target_weights[i] = (1-self.tau)*target_weights[i] + self.tau*weights[i]
self.target_model.set_weights(target_weights) #
def main(episodes):
env = gym.make('BreakoutDeterministic-v4')
agent = DQNAgent(env, cnn)
time = env._max_episode_steps
batch_size = 32
save_model = 'y'
filepath = os.getcwd()
date = dt.strftime('%d%m%Y')
clock = dt.strftime('%H.%M.%S')
print('++ Training started on {} at {} ++'.format(date, clock))
start_time = dt.time()
tot_r = []
tot_loss = []
it_r = []
it_loss = []
tot_frames = 0
for e in range(episodes):
r = []
loss = []
state = env.reset()
state = preprocess(state)
state = state[None,:]
current_life = agent.start_life
for t in range(time):
if rend_env == 'y':
action = agent.act(state)
new_state, reward, terminal_life, life = env.step(action)
new_state = preprocess(new_state)
new_state = new_state[None,:]
if life['ale.lives'] < current_life:
reward = -1
current_life = life['ale.lives']
agent.remember(state, action, reward, new_state, terminal_life)
hist = agent.replay(batch_size)
agent.train_target()
state = new_state
r.append(reward)
tot_frames += 1
if hist is None:
loss.append(0.0)
else:
loss.append(hist.history['loss'][0])
if t%20 == 0:
print('Frame : {}, Cum Reward = {}, Avg Loss = {}, Curr Life: {}'.format(t,
np.sum(r),
round(np.mean(loss[-20:-1]),3),
current_life))
agent.model.save('{}/Mod_Fig/DQN_BO_model_{}.h5'.format(filepath, date))
agent.model.save_weights('{}/Mod_Fig/DQN_BO_weights_{}.h5'.format(filepath, date))
if current_life == 0 or terminal_life:
print('Episode {} of {}, Cum Reward = {}, Avg Loss = {}'.format(e, episodes, np.sum(r), np.mean(loss)))
break
tot_r.append(np.sum(r))
tot_loss.append(np.mean(loss))
it_r.append(r)
it_loss.append(loss)
print('Training ended on {} at {}'.format(date, clock))
run_time = dt.time() - start_time
print('Total Training time: %d Hrs %d Mins $d s' % (run_time // 3600, (run_time % 3600) // 60),
(run_time % 3600) % 60 // 1)
if save_model == 'y':
agent.model.save('{}/Mod_Fig/DQN_BO_finalmodel_{}_{}.h5'.format(filepath, date, clock))
agent.model.save_weights('{}/Mod_Fig/DQN_BO_finalweights_{}_{}.h5'.format(filepath, date, clock))
agent.model.summary()
return tot_r, tot_loss, it_r, it_loss, tot_frames
if __name__ == '__main__':
episodes = 3
total_reward, total_loss, rewards_iter, loss_iter, frames_epi = main(episodes=episodes)将非常感谢您的评论和帮助编写内存和快速高效的深RL代码!我希望训练我的DQN突破5000集,但远程服务器只允许最多48小时的培训。提前感谢!
发布于 2019-01-26 09:01:25
听起来你好像有记忆泄露。
这条线
agent.remember(state, action, reward, new_state, terminal_life)获取5000 * env._max_episode_steps时间,每个state都是一个(210, 160, 3)数组。首先要尝试的是缩小self.memory = deque(maxlen=1000000) # specify memory size的大小,以验证这是唯一的原因。
如果您真的认为您需要这么大的容量,那么您应该将self.memory转储到磁盘中,并在内存中保留一个小的子示例。
另外,:来自deque的子采样非常慢,deque是作为链表实现的,因此每个子样本都是O(N*M)。您应该考虑为self.memory实现自己的环形缓冲区。
Alternatively: --您可能会考虑一个概率缓冲区(我不知道正确的名称),每次添加到完整缓冲区时,随机删除一个元素并追加新元素。这意味着任何(状态,行动,奖励,.)遇到的元组在缓冲区中包含的概率不是零,最近的元组比旧的元组更有可能被包含。
发布于 2019-01-30 03:41:46
我的记忆力也有类似的问题,现在仍然如此。
内存消耗大的主要原因是states。但我做了些什么让事情变得更好
步骤1:使用openCV将它们调整为84 x 84示例。而有些人则将图像降至84 x 84。这将导致每个state都具有形状(84,84,3)。
步骤2:将这些帧转换为灰度(基本上是黑白的)。这应该会将形状更改为(84,84,1)。
步骤3:使用dtype=np.uint8存储states。它们消耗最少的内存,并且非常适合于像素强度值范围内的0-255。
附加信息
我在免费 Google笔记本(K80 Tesla GPU和13 to内存)上运行我的代码,定期将重放缓冲区保存到我的驱动器上。
对于步骤1和步骤2,考虑使用OpenAI基线包装纸,因为重新创建车轮没有意义。
您还可以使用这个片段来检查您自己的程序在每一步中使用的RAM数量,就像我所做的那样:
import os
import psutil
def show_RAM_usage(self):
py = psutil.Process(os.getpid())
print('RAM usage: {} GB'.format(py.memory_info()[0]/2. ** 30))这个片段被修改为在我自己的原始答案程序中使用。
https://stackoverflow.com/questions/54360708
复制相似问题