我正在训练一个神经网络来计算3x3矩阵的逆。我使用的是具有1层和9个神经元的Keras密度模型。第一层上的激活函数是'relu‘,在输出层上是线性的。我使用了10000个行列式1的矩阵。我得到的结果不是很好(均方根在数百)。我一直在尝试更多的层,更多的神经元和其他激活函数,但增益非常小。代码如下:
import numpy as np
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
def generator(nb_samples, matrix_size = 2, entries_range = (0,1), determinant = None):
'''
Generate nb_samples random matrices of size matrix_size with float
entries in interval entries_range and of determinant determinant
'''
matrices = []
if determinant:
inverses = []
for i in range(nb_samples):
matrix = np.random.uniform(entries_range[0], entries_range[1], (matrix_size,matrix_size))
matrix[0] *= determinant/np.linalg.det(matrix)
matrices.append(matrix.reshape(matrix_size**2,))
inverses.append(np.array(np.linalg.inv(matrix)).reshape(matrix_size**2,))
return np.array(matrices), np.array(inverses)
else:
determinants = []
for i in range(nb_samples):
matrix = np.random.uniform(entries_range[0], entries_range[1], (matrix_size,matrix_size))
determinants.append(np.array(np.linalg.det(matrix)).reshape(1,))
matrices.append(matrix.reshape(matrix_size**2,))
return np.array(matrices), np.array(determinants)
### Select number of samples, matrix size and range of entries in matrices
nb_samples = 10000
matrix_size = 3
entries_range = (0, 100)
determinant = 1
### Generate random matrices and determinants
matrices, inverses = generator(nb_samples, matrix_size = matrix_size, entries_range = entries_range, determinant = determinant)
### Select number of layers and neurons
nb_hidden_layers = 1
nb_neurons = matrix_size**2
activation = 'relu'
### Create dense neural network with nb_hidden_layers hidden layers having nb_neurons neurons each
model = Sequential()
model.add(Dense(nb_neurons, input_dim = matrix_size**2, activation = activation))
for i in range(nb_hidden_layers):
model.add(Dense(nb_neurons, activation = activation))
model.add(Dense(matrix_size**2))
model.compile(loss='mse', optimizer='adam')
### Train and save model using train size of 0.66
history = model.fit(matrices, inverses, epochs = 400, batch_size = 100, verbose = 0, validation_split = 0.33)
### Get validation loss from object 'history'
rmse = np.sqrt(history.history['val_loss'][-1])
### Print RMSE and parameter values
print('''
Validation RMSE: {}
Number of hidden layers: {}
Number of neurons: {}
Number of samples: {}
Matrices size: {}
Range of entries: {}
Determinant: {}
'''.format(rmse,nb_hidden_layers,nb_neurons,nb_samples,matrix_size,entries_range,determinant))我在网上查过,似乎有一些关于逆矩阵近似问题的论文。然而,在更改模型之前,我想知道是否有其他参数可以更改,这些参数可能会对误差产生更大的影响。我希望有人能提供一些见解。谢谢。
发布于 2021-01-09 13:18:47
对3x3矩阵求逆对于神经网络来说是相当困难的,因为它们往往不擅长乘或除激活。我不能让它在一个简单的密集网络中工作,但一个7层的resnet可以做到这一点。它有数以百万计的权重,所以它需要的例子远不止10000个:我发现它完全记忆了多达100,000个样本,即使有10,000,000个样本也严重过度拟合,所以我只是连续生成样本,并在生成样本时将每个样本馈送到网络一次。
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
#too_small_model = tf.keras.Sequential([
# tf.keras.layers.Flatten(),
# tf.keras.layers.Dense(1500, activation="relu"),
# tf.keras.layers.Dense(1500, activation="relu"),
# tf.keras.layers.Dense(N * N),
# tf.keras.layers.Reshape([ N, N])
#])
N = 3
inp = tf.keras.layers.Input(shape=[N, N])
x = tf.keras.layers.Flatten()(inp)
x = tf.keras.layers.Dense(128, activation="relu")(x)
for _ in range(7):
skip = x
for _ in range(4):
y = tf.keras.layers.Dense(256, activation="relu")(x)
x = tf.keras.layers.concatenate([x, y])
#x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dense(128,
kernel_initializer=tf.keras.initializers.Zeros(),
bias_initializer=tf.keras.initializers.Zeros()
)(x)
x = skip + x
#x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dense(N * N)(x)
x = tf.keras.layers.Reshape([N, N])(x)
model2 = tf.keras.models.Model(inp, x)
model2.compile(loss="mean_squared_error", optimizer=tf.keras.optimizers.Adam(learning_rate=.00001))
for _ in range(5000):
random_matrices = np.random.random((1000000, N, N)) * 4 - 2
random_matrices = random_matrices[np.abs(np.linalg.det(random_matrices)) > .1]
inverses = np.linalg.inv(random_matrices)
inverses = inverses / 5. # normalize target values, large target values hamper training
model2.fit(random_matrices, inverses, epochs=1, batch_size=1024)
zz = model2.predict(random_matrices[:10000])
plt.scatter(inverses[:10000], zz, s=.0001)
print(random_matrices[76] @ zz[76] * 5)https://stackoverflow.com/questions/65228352
复制相似问题