首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >TensorFlow2.0相同的模型结构和超参数导致不同调用方式的性能不同

TensorFlow2.0相同的模型结构和超参数导致不同调用方式的性能不同
EN

Stack Overflow用户
提问于 2019-11-26 01:45:51
回答 1查看 53关注 0票数 0

那里。我是一个初学者,正在学习Tensorflow 2.0。我有一个模型,名为3种不同的方法。而且表演是不同的。有人能告诉我为什么会这样吗?

模型构造和调用方法:

代码语言:javascript
复制
import os, sys

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, models, layers, regularizers, optimizers


def prepare_mnist_features_and_labels(x, y):
    x = tf.cast(x, tf.float32)/255.0
    y = tf.cast(y, tf.int64)
    return x, y


def mninst_dataset():
    (x_train, y_train), (x_eval, y_eval) = datasets.mnist.load_data()
    print('x_train/y_train shape:', x_train.shape, y_train.shape)
    y_train = tf.one_hot(y_train, depth=10)
    y_eval = tf.one_hot(y_eval, depth=10)

    ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    ds_train = ds_train.map(prepare_mnist_features_and_labels)
    ds_train = ds_train.shuffle(x_train.shape[0]).batch(128)

    ds_eval = tf.data.Dataset.from_tensor_slices((x_eval, y_eval))
    ds_eval = ds_eval.map(prepare_mnist_features_and_labels)
    ds_eval = ds_eval.shuffle(x_eval.shape[0]).batch(128)

    sample = next(iter(ds_train))
    print('sample: ', sample[0].shape, sample[1].shape)

    return ds_train, ds_eval


def main():
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

    trainset, evalset = mninst_dataset()

    model = keras.Sequential()
    model.add(layers.Reshape(target_shape=[28, 28, 1], input_shape=[28, 28]))
    model.add(layers.Conv2D(filters=32, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME"))
    model.add(layers.MaxPool2D(pool_size=(2,2), strides=[1,1], padding="SAME"))
    model.add(layers.Conv2D(filters=64, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME"))
    model.add(layers.MaxPool2D(pool_size=(2,2), strides=[2,2], padding="SAME"))
    model.add(layers.Flatten())
    model.add(layers.Dense(units=512, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01)))
    model.add(layers.Dense(units=10, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01)))

    model.compile(optimizer=optimizers.Adam(lr=0.01), loss=tf.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
    model.fit(trainset.repeat(), epochs=30, steps_per_epoch=500,
              validation_data=evalset.repeat(), validation_steps=10)

if __name__=='__main__':
    main()

构建模型并运行它的第二种方法如下:

代码语言:javascript
复制
    import os, sys

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, models, layers, regularizers, optimizers

from tqdm import tqdm

def prepare_mnist_features_and_labels(x, y):
    x = tf.cast(x, tf.float32)/255.0
    y = tf.cast(y, tf.int64)
    return x, y


def mnist_dataset():
    (x_train, y_train), (x_eval, y_eval) = datasets.mnist.load_data()
     # y_train = tf.one_hot(y_train, depth=10)
    # y_eval = tf.one_hot(y_eval, depth=10)

    ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    ds_train = ds_train.map(prepare_mnist_features_and_labels)
    # Test: replace x_train.shape[0] by the number of the training samples, which is 60000
    ds_train = ds_train.shuffle(x_train.shape[0]).batch(128)

    ds_eval = tf.data.Dataset.from_tensor_slices((x_eval, y_eval))
    ds_eval = ds_eval.map(prepare_mnist_features_and_labels)
    ds_eval = ds_eval.shuffle(x_eval.shape[0]).batch(128)

    # sample = next(iter(ds_train))
    # print('sample: ', sample[0].shape, sample[1].shape)

    return ds_train, ds_eval

# tf.nn.sparse_softmax_cross_entropy_with_logits(labels, logits, name=None):
# labels: Tensof of shape [d_0, d_1, ..., d_{r-1}]. Each label must be an index in [0, num_classes]
# logits: Unscaled of log probabilities of shape [d_0, d_1, ..., d_{r-1}, num_classes]
# A common use is to have logits of shape [batch_size, num_classes] and have labels of shape [batch_size]
def compute_loss(logits, labels):
    # print(logits.numpy())
    # print(labels.numpy())
    return tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(
             labels=labels, logits=logits
                )
            )


def compute_accuracy(logits, labels):
    predictions = tf.argmax(logits, axis=1)
    # print(predictions)
    # print(labels)
    # print(list(zip(predictions.numpy(), labels.numpy())))
    return tf.reduce_mean(tf.cast(tf.equal(predictions, labels), tf.float32))


def train_one_step(model, optimizer, x, y):
    # At each train step, first calculate the forward loss
    with tf.GradientTape() as tape:
        logits = model(x)
        loss = compute_loss(logits, y)

    # Then calculate the backward gradients over each trainable variables
    grads = tape.gradient(loss, model.trainable_variables)
    # Optimize and update variables throught backpropagation
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    # Compute current model accuracy
    accuracy = compute_accuracy(logits, y)

    return loss, accuracy


def train(epoch, model, optimizer, trainset):
#def train(epoch, model, optimizer):
    # trainset = mnist_dataset()[0]
    loss = 0.0
    accuracy = 0.0

    #for step, (x, y) in enumerate(tqdm(trainset)):
    for step, (x, y) in enumerate(tqdm(trainset)):
        loss, accuracy = train_one_step(model, optimizer, x, y)

        if step % 110 == 0:
            print('epoch', epoch, ': loss', loss.numpy(), '; accuracy', accuracy.numpy())

    return loss, accuracy


class MyModel(keras.Model):


    def __init__(self):
        super(MyModel, self).__init__()
        self.layer1 = layers.Conv2D(filters=32, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME", input_shape=(-1, 28, 28, 1))
        self.layer2 = layers.MaxPool2D(pool_size=(2,2), strides=[1,1], padding="SAME")
        self.layer3 = layers.Conv2D(filters=64, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME")
        self.layer4 = layers.MaxPool2D(pool_size=(2,2), strides=[2,2], padding="SAME")
        self.layer5 = layers.Flatten()
        self.layer6 = layers.Dense(units=512, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01))
        self.layer7 = layers.Dense(units=10, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01))


    def call(self, x, training=False):
        x = tf.reshape(x, (-1, 28, 28, 1))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.layer6(x)
        x = self.layer7(x)
        return x


def main():

    # set random seed
    tf.random.set_seed(22)    

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

    trainset, evalset = mnist_dataset()

    model = MyModel()

    optimizer = optimizers.Adam(lr=0.001)

    # Save checkpoints with keras api as the first approach
    # Save checkpoints manually as a second approach.
    # find a way to implement early-stopping strategy in the programming style

    # for epoch in tqdm(range(30)):
    for epoch in range(50):
        loss, accuracy = train(epoch, model, optimizer, trainset)

    print('Final epoch', epoch, ': loss', loss.numpy(), '; accuracy', accuracy.numpy())


if __name__ == '__main__':
    main()

最后一种方法如下:

代码语言:javascript
复制
import os, sys

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, models, layers, regularizers, optimizers

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


def prepare_mnist_features_and_labels(x, y):
    x = tf.cast(x, tf.float32)/255.0
    y = tf.cast(y, tf.int64)
    return x, y


def mnist_dataset():
    (x_train, y_train), (x_eval, y_eval) = datasets.mnist.load_data()
    print('x_train/y_train shape:', x_train.shape, y_train.shape)
    y_train = tf.one_hot(y_train, depth=10)
    y_eval = tf.one_hot(y_eval, depth=10)

    ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    ds_train = ds_train.map(prepare_mnist_features_and_labels)
    ds_train = ds_train.shuffle(x_train.shape[0]).batch(128)

    ds_eval = tf.data.Dataset.from_tensor_slices((x_eval, y_eval))
    ds_eval = ds_eval.map(prepare_mnist_features_and_labels)
    ds_eval = ds_eval.shuffle(x_eval.shape[0]).batch(128)

    sample = next(iter(ds_train))
    print('sample: ', sample[0].shape, sample[1].shape)

    return ds_train, ds_eval


class MyModel(keras.Model):

    # self.model = keras.Sequential([
    #     layers.Reshape(target_shape=(28*28, ), input_shape=(28, 28)),
    #     layers.Dense(100, activation=tf.nn.relu),
    #     layers.Dense(100, activation=tf.nn.relu),
    #     layers.Desnse(10)
    # ])
    def __init__(self):
        super(MyModel, self).__init__()
        self.layer1 = layers.Conv2D(filters=32, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME", input_shape=(-1, 28, 28, 1))
        self.layer2 = layers.MaxPool2D(pool_size=(2,2), strides=[1,1], padding="SAME")
        self.layer3 = layers.Conv2D(filters=64, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME")
        self.layer4 = layers.MaxPool2D(pool_size=(2,2), strides=[2,2], padding="SAME")
        self.layer5 = layers.Flatten()
        self.layer6 = layers.Dense(units=512, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01))
        self.layer7 = layers.Dense(units=10, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01))


    def call(self, x, training=False):
        x = tf.reshape(x, (-1, 28, 28, 1))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.layer6(x)
        x = self.layer7(x)
        return x


def main():
    tf.random.set_seed(22)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

    trainset, evalset = mnist_dataset()

    model = MyModel()
    model.compile(optimizer=optimizers.Adam(lr=0.001), loss=tf.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
    model.fit(trainset.repeat(), epochs=30, steps_per_epoch=500, verbose=1,
              validation_data=evalset.repeat(), validation_steps=10)


if __name__ == '__main__':
    main()

他们每个人都需要一段时间来训练。谁能告诉我为什么表演是不同的?那将来我该如何自己调试呢?

非常感谢你的帮助。

EN

回答 1

Stack Overflow用户

回答已采纳

发布于 2019-11-28 15:09:52

仔细检查网络后,问题就解决了。事实证明,模型中最后一个完全连接的层是使用relu函数激活的,这是不合适的。损失函数tf.nn.sparse_softmax_cross_entropy_with_logits和tf.losses.categoricalCrossentropy的选择也有很大的不同。无论选择哪一个,都要确保损失函数与网络的最终输出相匹配。

票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/59037441

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档