我读了一些关于神经网络反向传播的教程,并决定从无到有地实现一个。在过去的几天里,我试图在代码中找到这个错误,但没有成功。
我学习了这教程,希望能够实现正弦函数逼近器。这是一个简单的网络:1个输入神经元,10个隐藏神经元和1个输出神经元。激活函数是第二层的乙状体。完全相同的模型很容易在Tensorflow中工作。
def sigmoid(x):
return 1 / (1 + np.math.e ** -x)
def sigmoid_deriv(x):
return sigmoid(x) * (1 - sigmoid(x))
x_data = np.random.rand(500) * 15.0
y_data = [sin(x) for x in x_data]
ETA = .01
layer1 = 0
layer1_weights = np.random.rand(10) * 2. - 1.
layer2 = np.zeros(10)
layer2_weights = np.random.rand(10) * 2. - 1.
layer3 = 0
for loop_iter in range(500000):
# data init
index = np.random.randint(0, 500)
x = x_data[index]
y = y_data[index]
# forward propagation
# layer 1
layer1 = x
# layer 2
layer2 = layer1_weights * layer1
# layer 3
layer3 = sum(sigmoid(layer2) * layer2_weights)
# error
error = .5 * (layer3 - y) ** 2 # L2 loss
# backpropagation
# error_wrt_layer3 * layer3_wrt_weights_layer2
error_wrt_layer2_weights = (y - layer3) * sigmoid(layer2)
# error_wrt_layer3 * layer3_wrt_out_layer2 * out_layer2_wrt_in_layer2 * in_layer2_wrt_weights_layer1
error_wrt_layer1_weights = (y - layer3) * layer2_weights * sigmoid_deriv(sigmoid(layer2)) * layer1
# update the weights
layer2_weights -= ETA * error_wrt_layer2_weights
layer1_weights -= ETA * error_wrt_layer1_weights
if loop_iter % 10000 == 0:
print(error)意想不到的行为只是网络不收敛。请回顾一下我的error_wrt_..。衍生产品。问题就在这里。
下面是Tensorflow代码,它可以完美地工作:
x_data = np.array(np.random.rand(500)).reshape(500, 1)
y_data = np.array([sin(x) for x in x_data]).reshape(500, 1)
x = tf.placeholder(tf.float32, shape=[None, 1])
y_true = tf.placeholder(tf.float32, shape=[None, 1])
W = tf.Variable(tf.random_uniform([1, 10], -1.0, 1.0))
hidden1 = tf.nn.sigmoid(tf.matmul(x, W))
W_hidden = tf.Variable(tf.random_uniform([10, 1], -1.0, 1.0))
output = tf.matmul(hidden1, W_hidden)
loss = tf.square(output - y_true) / 2.
optimizer = tf.train.GradientDescentOptimizer(.01)
train = optimizer.minimize(loss)
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
for i in range(500000):
rand_index = np.random.randint(0, 500)
_, error = sess.run([train, loss], feed_dict={x: [x_data[rand_index]],
y_true: [y_data[rand_index]]})
if i % 10000 == 0:
print(error)
sess.close()发布于 2016-11-07 14:37:35
我认为你最大的问题是缺乏偏见。在输入层和隐藏层之间,您不仅应该通过权重进行转换,还应该添加一个偏差。这种偏见将使你的乙状结肠功能向左或向右移动。看看这段代码(我做了一些修改)。
重要的是:
np.random.rand(width) * 15. - 7.5)做了一些很好的随机起点,使得所有偏差都是所需x尺度上的随机点。如果有些地方不清楚,请告诉我:
import numpy as np
import matplotlib.pyplot as plt
def sigmoid(x):
return 1 / (1 + np.math.e ** -x)
def sigmoid_deriv(x):
return sigmoid(x) * (1 - sigmoid(x))
def guess(x):
layer1 = x
z_2 = layer1_weights * layer1 + layer1_biases
a_2 =sigmoid(z_2)
z_3 = np.dot(a_2, layer2_weights) + layer2_biases
# a_3 = sigmoid(z_3)
a_3 = z_3
return a_3
x_data = np.random.rand(500) * 15.0 - 7.5
y_data = [np.sin(x) for x in x_data]
ETA = 0.05
width = 10
layer1_weights = np.random.rand(width) * 2. - 1.
layer1_biases = np.random.rand(width) * 15. - 7.5
layer2_weights = np.random.rand(width) * 2. - 1.
layer2_biases = np.random.rand(1)* 2. - 1.
error_all = []
x_all = x_data
y_all = [guess(x_i) for x_i in x_all]
plt.plot(x_all,y_all, '.')
plt.plot(x_data, y_data, '.')
plt.show()
epochs = 500000
for loop_iter in range(epochs):
# data init
index = np.random.randint(0, 500)
x = x_data[index]
y = y_data[index]
# forward propagation
# layer 1
layer1 = x
# layer 2
#TODO add the sigmoid function here
z_2 = layer1_weights * layer1 + layer1_biases
a_2 =sigmoid(z_2)
# layer 3
#TODO remove simgmoid here (not that is really matters, but values at each layer are after sigmoid
z_3 = np.dot(a_2, layer2_weights) + layer2_biases
# a_3 = sigmoid(z_3)
a_3 = z_3
# error
error = .5 * (a_3 - y) ** 2 # L2 loss
# backpropagation
# error_wrt_layer3 * layer3_wrt_weights_layer2
# error_wrt_layer2_weights = (y - layer3) * sigmoid(layer2)
delta = (a_3 - y)
error_wrt_layer2_weights = delta * a_2
error_wrt_layer2_biases = delta
# error_wrt_layer3 * layer3_wrt_out_layer2 * out_layer2_wrt_in_layer2 * in_layer2_wrt_weights_layer1
# error_wrt_layer1_weights = (y - layer3) * layer2_weights * sigmoid_deriv(sigmoid(layer2)) * layer1
error_wrt_layer1_weights = delta * np.dot(sigmoid_deriv(z_2), layer2_weights) * layer1
# error_wrt_layer1_weights = 0
error_wrt_layer1_biases = delta * np.dot(sigmoid_deriv(z_2), layer2_weights)
# a = 0
# while a ==0:
# a*0
# update the weights
layer2_weights -= ETA * error_wrt_layer2_weights
layer1_weights -= ETA * error_wrt_layer1_weights
layer2_biases -= ETA * error_wrt_layer2_biases
layer1_biases -= ETA * error_wrt_layer1_biases
error_all.append(error)
if loop_iter % 10000 == 0:
print(error)
# plt.plot(error_all)
# plt.show()
x_all = x_data
y_all = [guess(x_i) for x_i in x_all]
plt.plot(x_all,y_all, '.')
plt.plot(x_data, y_data, '.')
plt.show()https://datascience.stackexchange.com/questions/14978
复制相似问题