那里。我是一个初学者,正在学习Tensorflow 2.0。我有一个模型,名为3种不同的方法。而且表演是不同的。有人能告诉我为什么会这样吗?
模型构造和调用方法:
import os, sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, models, layers, regularizers, optimizers
def prepare_mnist_features_and_labels(x, y):
x = tf.cast(x, tf.float32)/255.0
y = tf.cast(y, tf.int64)
return x, y
def mninst_dataset():
(x_train, y_train), (x_eval, y_eval) = datasets.mnist.load_data()
print('x_train/y_train shape:', x_train.shape, y_train.shape)
y_train = tf.one_hot(y_train, depth=10)
y_eval = tf.one_hot(y_eval, depth=10)
ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
ds_train = ds_train.map(prepare_mnist_features_and_labels)
ds_train = ds_train.shuffle(x_train.shape[0]).batch(128)
ds_eval = tf.data.Dataset.from_tensor_slices((x_eval, y_eval))
ds_eval = ds_eval.map(prepare_mnist_features_and_labels)
ds_eval = ds_eval.shuffle(x_eval.shape[0]).batch(128)
sample = next(iter(ds_train))
print('sample: ', sample[0].shape, sample[1].shape)
return ds_train, ds_eval
def main():
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
trainset, evalset = mninst_dataset()
model = keras.Sequential()
model.add(layers.Reshape(target_shape=[28, 28, 1], input_shape=[28, 28]))
model.add(layers.Conv2D(filters=32, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME"))
model.add(layers.MaxPool2D(pool_size=(2,2), strides=[1,1], padding="SAME"))
model.add(layers.Conv2D(filters=64, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME"))
model.add(layers.MaxPool2D(pool_size=(2,2), strides=[2,2], padding="SAME"))
model.add(layers.Flatten())
model.add(layers.Dense(units=512, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01)))
model.add(layers.Dense(units=10, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01)))
model.compile(optimizer=optimizers.Adam(lr=0.01), loss=tf.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
model.fit(trainset.repeat(), epochs=30, steps_per_epoch=500,
validation_data=evalset.repeat(), validation_steps=10)
if __name__=='__main__':
main()构建模型并运行它的第二种方法如下:
import os, sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, models, layers, regularizers, optimizers
from tqdm import tqdm
def prepare_mnist_features_and_labels(x, y):
x = tf.cast(x, tf.float32)/255.0
y = tf.cast(y, tf.int64)
return x, y
def mnist_dataset():
(x_train, y_train), (x_eval, y_eval) = datasets.mnist.load_data()
# y_train = tf.one_hot(y_train, depth=10)
# y_eval = tf.one_hot(y_eval, depth=10)
ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
ds_train = ds_train.map(prepare_mnist_features_and_labels)
# Test: replace x_train.shape[0] by the number of the training samples, which is 60000
ds_train = ds_train.shuffle(x_train.shape[0]).batch(128)
ds_eval = tf.data.Dataset.from_tensor_slices((x_eval, y_eval))
ds_eval = ds_eval.map(prepare_mnist_features_and_labels)
ds_eval = ds_eval.shuffle(x_eval.shape[0]).batch(128)
# sample = next(iter(ds_train))
# print('sample: ', sample[0].shape, sample[1].shape)
return ds_train, ds_eval
# tf.nn.sparse_softmax_cross_entropy_with_logits(labels, logits, name=None):
# labels: Tensof of shape [d_0, d_1, ..., d_{r-1}]. Each label must be an index in [0, num_classes]
# logits: Unscaled of log probabilities of shape [d_0, d_1, ..., d_{r-1}, num_classes]
# A common use is to have logits of shape [batch_size, num_classes] and have labels of shape [batch_size]
def compute_loss(logits, labels):
# print(logits.numpy())
# print(labels.numpy())
return tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits
)
)
def compute_accuracy(logits, labels):
predictions = tf.argmax(logits, axis=1)
# print(predictions)
# print(labels)
# print(list(zip(predictions.numpy(), labels.numpy())))
return tf.reduce_mean(tf.cast(tf.equal(predictions, labels), tf.float32))
def train_one_step(model, optimizer, x, y):
# At each train step, first calculate the forward loss
with tf.GradientTape() as tape:
logits = model(x)
loss = compute_loss(logits, y)
# Then calculate the backward gradients over each trainable variables
grads = tape.gradient(loss, model.trainable_variables)
# Optimize and update variables throught backpropagation
optimizer.apply_gradients(zip(grads, model.trainable_variables))
# Compute current model accuracy
accuracy = compute_accuracy(logits, y)
return loss, accuracy
def train(epoch, model, optimizer, trainset):
#def train(epoch, model, optimizer):
# trainset = mnist_dataset()[0]
loss = 0.0
accuracy = 0.0
#for step, (x, y) in enumerate(tqdm(trainset)):
for step, (x, y) in enumerate(tqdm(trainset)):
loss, accuracy = train_one_step(model, optimizer, x, y)
if step % 110 == 0:
print('epoch', epoch, ': loss', loss.numpy(), '; accuracy', accuracy.numpy())
return loss, accuracy
class MyModel(keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.layer1 = layers.Conv2D(filters=32, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME", input_shape=(-1, 28, 28, 1))
self.layer2 = layers.MaxPool2D(pool_size=(2,2), strides=[1,1], padding="SAME")
self.layer3 = layers.Conv2D(filters=64, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME")
self.layer4 = layers.MaxPool2D(pool_size=(2,2), strides=[2,2], padding="SAME")
self.layer5 = layers.Flatten()
self.layer6 = layers.Dense(units=512, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01))
self.layer7 = layers.Dense(units=10, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01))
def call(self, x, training=False):
x = tf.reshape(x, (-1, 28, 28, 1))
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
x = self.layer6(x)
x = self.layer7(x)
return x
def main():
# set random seed
tf.random.set_seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
trainset, evalset = mnist_dataset()
model = MyModel()
optimizer = optimizers.Adam(lr=0.001)
# Save checkpoints with keras api as the first approach
# Save checkpoints manually as a second approach.
# find a way to implement early-stopping strategy in the programming style
# for epoch in tqdm(range(30)):
for epoch in range(50):
loss, accuracy = train(epoch, model, optimizer, trainset)
print('Final epoch', epoch, ': loss', loss.numpy(), '; accuracy', accuracy.numpy())
if __name__ == '__main__':
main()最后一种方法如下:
import os, sys
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, models, layers, regularizers, optimizers
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def prepare_mnist_features_and_labels(x, y):
x = tf.cast(x, tf.float32)/255.0
y = tf.cast(y, tf.int64)
return x, y
def mnist_dataset():
(x_train, y_train), (x_eval, y_eval) = datasets.mnist.load_data()
print('x_train/y_train shape:', x_train.shape, y_train.shape)
y_train = tf.one_hot(y_train, depth=10)
y_eval = tf.one_hot(y_eval, depth=10)
ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
ds_train = ds_train.map(prepare_mnist_features_and_labels)
ds_train = ds_train.shuffle(x_train.shape[0]).batch(128)
ds_eval = tf.data.Dataset.from_tensor_slices((x_eval, y_eval))
ds_eval = ds_eval.map(prepare_mnist_features_and_labels)
ds_eval = ds_eval.shuffle(x_eval.shape[0]).batch(128)
sample = next(iter(ds_train))
print('sample: ', sample[0].shape, sample[1].shape)
return ds_train, ds_eval
class MyModel(keras.Model):
# self.model = keras.Sequential([
# layers.Reshape(target_shape=(28*28, ), input_shape=(28, 28)),
# layers.Dense(100, activation=tf.nn.relu),
# layers.Dense(100, activation=tf.nn.relu),
# layers.Desnse(10)
# ])
def __init__(self):
super(MyModel, self).__init__()
self.layer1 = layers.Conv2D(filters=32, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME", input_shape=(-1, 28, 28, 1))
self.layer2 = layers.MaxPool2D(pool_size=(2,2), strides=[1,1], padding="SAME")
self.layer3 = layers.Conv2D(filters=64, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME")
self.layer4 = layers.MaxPool2D(pool_size=(2,2), strides=[2,2], padding="SAME")
self.layer5 = layers.Flatten()
self.layer6 = layers.Dense(units=512, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01))
self.layer7 = layers.Dense(units=10, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01))
def call(self, x, training=False):
x = tf.reshape(x, (-1, 28, 28, 1))
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
x = self.layer6(x)
x = self.layer7(x)
return x
def main():
tf.random.set_seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
trainset, evalset = mnist_dataset()
model = MyModel()
model.compile(optimizer=optimizers.Adam(lr=0.001), loss=tf.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
model.fit(trainset.repeat(), epochs=30, steps_per_epoch=500, verbose=1,
validation_data=evalset.repeat(), validation_steps=10)
if __name__ == '__main__':
main()他们每个人都需要一段时间来训练。谁能告诉我为什么表演是不同的?那将来我该如何自己调试呢?
非常感谢你的帮助。
发布于 2019-11-28 15:09:52
仔细检查网络后,问题就解决了。事实证明,模型中最后一个完全连接的层是使用relu函数激活的,这是不合适的。损失函数tf.nn.sparse_softmax_cross_entropy_with_logits和tf.losses.categoricalCrossentropy的选择也有很大的不同。无论选择哪一个,都要确保损失函数与网络的最终输出相匹配。
https://stackoverflow.com/questions/59037441
复制相似问题