我在Tensorflow 2.0中编写了一个自定义层,我遇到了如下问题:
我想将一维权重数组(5x1)转换为二维数组(10x10)。假设我有一个从一维到二维的索引,如下所示,weight_index_lst:
weight_id, row, col
1,5,6
2,6,7
3,7,8
4,8,9
5,9,10二维数组的其他位置将仅获得值0。这是我的自定义图层的脚本。我的输入是(10x1)形状。对于w_mat,它在任何未分配self.w的其他位置接收0
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class mylayer(layers.Layer):
def __init__(self, weight_index_lst, **kwargs):
super(mylayer, self).__init__(**kwargs)
self.weight_index_lst= weight_index_lst
def build(self):
self.w = self.add_weight(shape = (5,1),
initializer = 'he_normal',
trainable = True)
def call(self, inputs):
ct = 0
w_mat = tf.Variable(np.zeros((21, 21)),dtype='float32',trainable=False)
for i in range(20):
i1 = self.weight_index_lst[i,1] #row index
i2 = self.weight_index_lst[i,2] #column index
w_mat[i1,i2].assign(self.w[ct,0]) #problem with no gradient provided
#or w_mat[i1,i2] = self.w[ct,0] #resource variable cannot be assigned
ct = ct+1
y = tf.matmul(w_mat,inputs)
return y我可以声明一个(10x10)权重数组,但我的深度学习希望其他权重为0,并且无法训练。
发布于 2020-06-08 22:50:56
如果你想专门创建一个带有权重的新层,那么解决问题的方法(没有通过赋值传播的渐变)是将所有操作更改为符号张量操作-然后TF将能够传播渐变。为此,一种方法是创建要训练的权重的1d张量,将不可训练的常量张量附加0.0值,然后使用tf.gather为矩阵的每个n**2元素选择所需的权重和/或常数0,以便将层的输入乘以。由于所有操作都是符号张量操作,因此TF将能够毫无问题地传播梯度。这种方法的代码如下:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
class mylayer(layers.Layer):
def __init__(self, n, weight_index_lst, **kwargs):
super(mylayer, self).__init__(**kwargs)
self.weight_index_lst = weight_index_lst
self.n = n
def build(self, input_shape):
self.w = self.add_weight(shape = (len(self.weight_index_lst),),
initializer = 'he_normal',
trainable = True)
def call(self, inputs):
const_zero = tf.constant([0.], dtype=tf.float32)
const_zero_and_weights = tf.concat([const_zero, self.w], axis=0)
ct = 1 # start with 1 since 0 means take the non-trainable 0. from const_zero_and_weights
selector = np.zeros((self.n ** 2), dtype=np.int32) # indicies
for i, j in self.weight_index_lst:
selector[i * self.n + j] = ct
ct = ct+1
t_ind = tf.constant(selector, dtype=tf.int32)
w_flattened = tf.gather(const_zero_and_weights, t_ind)
w_matrix = tf.reshape(w_flattened, (self.n, self.n))
y = tf.matmul(w_matrix, inputs)
return y
m = tf.keras.Sequential([
layers.Dense(21**2, input_shape=(45,)),
layers.Reshape(target_shape=(21,21)),
mylayer(21, [(4,5), (5,6), (6,7), (7,8), (8,9)]),
])
m.summary()发布于 2020-06-08 06:15:28
你不需要为此创建一个可训练的层。考虑使用不可训练的lambda层:
def select_as_needed(x, wrc, n):
selector = np.zeros(n * n, dtype=np.int32) # tensor with the index of input element we want to select in each cell (0 otherwise)
mask = np.zeros(n * n, dtype=np.float32) # 0./1. tensor with ones only on the positions where we put some selected element
for w, r, c in wrc:
selector[r * n + c] = w
mask[r * n + c] = 1.0
t_ind = tf.constant(selector, dtype=tf.int32)
t_mask = tf.constant(mask, dtype=tf.float32)
return tf.gather(x, t_ind, axis=1) * mask # if we don't multiply by mask the 0-index value of input will go to all positions for which we didn't select anything
wrc = [(0,4,5), (1,5,6), (2,6,7), (3,7,8), (4,8,9)] # same as your table, but 0-based
n = 10
model = tf.keras.models.Sequential([
# ... your stuff
tf.keras.layers.Dense(5, 'linear'), # output of 5 neurons (or replace with whatever else you have which is producing 5 outputs per sample)
tf.keras.layers.Lambda(select_as_needed, arguments={'wrc': wrc, 'n':n}),
tf.keras.layers.Reshape(target_shape=(n, n)),
])https://stackoverflow.com/questions/62251962
复制相似问题