我遇到问题时,试图运行梯度下降的图形-神经网络的互动学习方式。我的目标是用图神经网络识别动作,用动作值计算损失,用损失值进行梯度下降。然而,梯度下降部分引起了问题。
我已经创建了问题的独立版本,并显示了下面的代码,还复制了在执行过程中获得的错误消息。
class GIN0(Model):
def __init__(self, channels, n_layers):
super().__init__()
self.conv1 = GINConv(channels, epsilon=0, mlp_hidden=[channels, channels])
self.convs = []
for _ in range(1, n_layers):
self.convs.append(
GINConv(channels, epsilon=0, mlp_hidden=[channels, channels])
)
self.pool = GlobalAvgPool()
self.dense1 = Dense(channels, activation="relu")
self.dropout = Dropout(0.5)
self.dense2 = Dense(channels, activation="relu")
def call(self, inputs):
x, a, i = inputs
x = self.conv1([x, a])
for conv in self.convs:
x = conv([x, a])
x = self.pool([x, i])
x = self.dense1(x)
x = self.dropout(x)
return self.dense2(x)
class IGDQN(object):
def __init__(self,
number_of_outputs,
layers,
alpha,
gamma,
epsilon
):
self.number_of_outputs = number_of_outputs
self.layers = layers
self.alpha = alpha
self.gamma = gamma
self.epsilon = epsilon
self.opt = Adam(lr=alpha)
self.model = GIN0(number_of_outputs, layers)
def choose_action(self, state, debug=False):
if np.random.rand() < self.epsilon:
return random.randrange(self.number_of_outputs)
q = self.model.predict(state)
if debug:
print('q=',q)
print('action_code=',np.argmin(q[0]))
return np.argmin(q[0])
@tf.function
def update(self, loss):
with tf.GradientTape(persistent=True) as tape:
#the gin0 network weights are updated
gradients = tape.gradient(loss, self.model.trainable_variables)
print(gradients)
self.opt.apply_gradients(zip(gradients, self.model.trainable_variables))
def get_inputs():
indices = [
[0, 1],
[0, 2],
[0, 4],
[1, 0],
[1, 2],
[1, 3],
[1, 5],
[2, 0],
[2, 1],
[2, 3],
[2, 4],
[3, 1],
[3, 2],
[3, 7],
[4, 0],
[4, 2],
[4, 5],
[4, 6],
[5, 1],
[5, 4],
[5, 6],
[6, 4],
[6, 5],
[6, 7],
[7, 3],
[7, 6]]
values = [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]
dense_shape = [8,8]
adjacency_matrix = tf.sparse.SparseTensor(
indices, values, dense_shape
)
matrix = [
[0., 0., 0., 1., 0., 6., 1.,],
[0., 0., 0., 1., 0., 7., 0.,],
[0., 0., 0., 1., 0., 1., 2.,],
[0., 0., 0., 1., 0., 1., 3.,],
[0., 0., 0., 1., 0., 6., 0.,],
[0., 0., 0., 1., 0., 7., 1.,],
[0., 0., 0., 1., 0., 0., 3.,],
[0., 0., 0., 1., 0., 0., 2.,],
]
properties_matrix = np.array(matrix)
am = tf.sparse.to_dense(adjacency_matrix)
g = Graph( x=properties_matrix, a=am.numpy(), e=None,y=[456] )
ds = [g]
design_name = PLconfig_grid.designName
dsr = CircuitDataset2(design_name, ds, False, path="/home/xx/CircuitAttributePrediction/dataset")
loader = DisjointLoader(dsr, batch_size=1)
inputs, target = loader.__next__()
return inputs
def check_IGDQN(designName, inputDir):
number_of_outputs = 128
layers = 3
alpha = 5e-4
gamma = 0.2
epsilon = 0.3
dqn = IGDQN(
number_of_outputs,
layers,
alpha,
gamma,
epsilon
)
inputs = get_inputs()
next_state = state = inputs
action = dqn.choose_action(state)
#loss calculation steps simplified for debug purposes
loss = tf.constant(100, dtype=tf.float32)
dqn.update(loss)在运行上面的代码时,我会得到以下错误。我从基于假设损失值的梯度函数中得到了Nones,它随后导致了权重更新过程中的错误。由于对图形、神经网络和spektral库的依赖,我正在使用祈使式的张量流。
我不知道这里出了什么问题。我在回归中使用了图-神经网络的梯度下降,而且效果很好。
[None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
Traceback (most recent call last):
File "test_PLKerasNetworks_GIN0.py", line 142, in <module>
main()
File "test_PLKerasNetworks_GIN0.py", line 136, in main
check_IGDQN(designName, inputDir)
File "test_PLKerasNetworks_GIN0.py", line 130, in check_IGDQN
dqn.update(loss)
File "/home/xx/.local/share/virtualenvs/xx-TxBsk36Y/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py", line 828, in __call__
result = self._call(*args, **kwds)
File "/home/xx/.local/share/virtualenvs/xx-TxBsk36Y/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py", line 871, in _call
self._initialize(args, kwds, add_initializers_to=initializers)
File "/home/xx/.local/share/virtualenvs/xx-TxBsk36Y/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py", line 726, in _initialize
*args, **kwds))
File "/home/xx/.local/share/virtualenvs/xx-TxBsk36Y/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 2969, in _get_concrete_function_internal_garbage_collected
graph_function, _ = self._maybe_define_function(args, kwargs)
File "/home/xx/.local/share/virtualenvs/xx-TxBsk36Y/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 3361, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/home/xx/.local/share/virtualenvs/xx-TxBsk36Y/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 3206, in _create_graph_function
capture_by_value=self._capture_by_value),
File "/home/xx/.local/share/virtualenvs/xx-TxBsk36Y/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py", line 990, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/home/xx/.local/share/virtualenvs/xx-TxBsk36Y/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py", line 634, in wrapped_fn
out = weak_wrapped_fn().__wrapped__(*args, **kwds)
File "/home/xx/.local/share/virtualenvs/xx-TxBsk36Y/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 3887, in bound_method_wrapper
return wrapped_fn(*args, **kwargs)
File "/home/xx/.local/share/virtualenvs/xx-TxBsk36Y/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py", line 977, in wrapper
raise e.ag_error_metadata.to_exception(e)
ValueError: in user code:
test_PLKerasNetworks_GIN0.py:56 update *
self.opt.apply_gradients(zip(gradients, self.model.trainable_variables))
/home/xx/.local/share/virtualenvs/xx-TxBsk36Y/lib/python3.7/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:598 apply_gradients **
grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars)
/home/xx/.local/share/virtualenvs/xx-TxBsk36Y/lib/python3.7/site-packages/tensorflow/python/keras/optimizer_v2/utils.py:79 filter_empty_gradients
([v.name for _, v in grads_and_vars],))
ValueError: No gradients provided for any variable: ['dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0', 'dense_2/kernel:0', 'dense_3/kernel:0', 'dense_3/bias:0', 'dense_4/kernel:0', 'dense_4/bias:0', 'dense_5/kernel:0', 'dense_6/kernel:0', 'dense_6/bias:0', 'dense_7/kernel:0', 'dense_7/bias:0', 'dense_8/kernel:0', 'gi_n0/dense/kernel:0', 'gi_n0/dense/bias:0', 'gi_n0/dense_1/kernel:0', 'gi_n0/dense_1/bias:0'].发布于 2022-09-28 06:14:33
上面的问题/问题涉及到使用子类Model和使用tf.function定义训练循环。tf.function创建问题的计算图。有效图的开发需要在更新函数中计算损失。
原始代码在更新函数外计算损失,tensorflow无法正确构建计算图。因此,梯度计算不正确。
这是关于这个问题的TensorFlow文档:https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch
下面是更新函数tf.function装饰器内部梯度计算的一个例子,它运行得很好:
@tf.function
def update(self, state, next_state):
loss = None
with tf.GradientTape(
watch_accessed_variables=True, persistent=False) as tape:
#the gin0 network weights are updated
v1 = self.model(state)
v2 = self.model(next_state)
action = tf.math.argmin(v2[0])
actions = tf.reshape([0,action], [1,2])
v2_updated = tf.tensor_scatter_nd_update(v1, actions, tf.constant([4.0]))
loss = self.mse(v1,v2_updated)
gradients = tape.gradient(loss, self.model.trainable_variables)
self.opt.apply_gradients(zip(gradients, self.model.trainable_variables))
return loss, actions, v1, self.model.trainable_variableshttps://stackoverflow.com/questions/73265395
复制相似问题