首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >当我使用具有相同检查点的估值器多次预测同一个文件时,预测结果会发生变化。

当我使用具有相同检查点的估值器多次预测同一个文件时,预测结果会发生变化。
EN

Stack Overflow用户
提问于 2019-09-24 15:32:37
回答 1查看 38关注 0票数 0

我在估计器中使用textcnn模型对一些文本进行分类。在对模型进行训练后,训练后的模型以检查点的形式存储。但是,当我试图用相同的检查点预测同一个测试文件时,预测的结果(porbalility和logits)略有变化。

在退出layer

  • checkpoints中设置了
  1. ,测试文件保持不变。在预测期间,
  2. 使用LoggingTensorHook检查张量值,在max_pool步骤中,两个值开始变化(至少conv值相同,但我不确定)

代码语言:javascript
复制
import tensorflow as tf

def line_parser(line, vocab):
    def parse_content(record):
        items = record.decode().strip().split()
        cat = int(items[-1])
        tokens = items[:-1]
        token_length = len(tokens)
        if token_length > FLAGS.max_sequence_length:
            tokens = tokens[:FLAGS.max_sequence_length]
        if token_length < FLAGS.max_sequence_length:
            tokens += [FLAGS.pad_word]*(FLAGS.max_sequence_length-token_length)
        return [tokens, cat]

    result = tf.py_func(parse_content, [line], [tf.string, tf.int64])
    ids = vocab.lookup(result[0])
    ids = tf.cast(ids, tf.int64)
    ids = tf.reshape(ids, [FLAGS.max_sequence_length])
    label = tf.one_hot(result[1], FLAGS.num_classes, dtype=tf.int32)
    return [ids, label]


def predict_line_parser(line, vocab):
    def parse_content(record):
        feature = record.decode().strip()
        tokens = feature.split()
        token_length = len(tokens)
        if token_length > FLAGS.max_sequence_length:
            tokens = tokens[:FLAGS.max_sequence_length]
        if token_length < FLAGS.max_sequence_length:
            tokens += [FLAGS.pad_word]*(FLAGS.max_sequence_length-token_length)
        return [tokens]

    result = tf.py_func(parse_content, [line], [tf.string])
    ids = vocab.lookup(result[0])
    ids = tf.cast(ids, tf.int64)
    ids = tf.reshape(ids, [FLAGS.max_sequence_length])
    return ids


def train_input_fn(file_paths, batch_size):
    vocab = tf.contrib.lookup.index_table_from_file(FLAGS.vocab_path)
    dataset = tf.data.TextLineDataset(file_paths)
    dataset = dataset.map(lambda line: line_parser(line, vocab))
    dataset = dataset.shuffle(1000)
    dataset = dataset.batch(batch_size).repeat()
    return dataset


def eval_input_fn(file_paths, batch_size):
    vocab = tf.contrib.lookup.index_table_from_file(FLAGS.vocab_path)
    dataset = tf.data.TextLineDataset(file_paths)
    dataset = dataset.map(lambda line: line_parser(line, vocab))
    dataset = dataset.batch(batch_size=batch_size)
    return dataset


def predict_input_fn(file_paths, batch_size):
    vocab = tf.contrib.lookup.index_table_from_file(FLAGS.vocab_path)
    dataset = tf.data.TextLineDataset(file_paths)
    dataset = dataset.map(lambda line:predict_line_parser(line, vocab))
    dataset = dataset.batch(batch_size=batch_size)
    return dataset


def create_model(features, params):
    # projection from sentence with id to embedding
    embedding_inputs = tf.nn.embedding_lookup(params["embedding"], features)
    embedding_inputs = tf.expand_dims(embedding_inputs, axis=-1)
    l2_loss = tf.constant(0.0, name="l2_loss", dtype="float64")

    # convolutional layer and pooling layer
    pooled_outputs = list()
    for i, filter_size in enumerate(params["filter_sizes"]):
        with tf.name_scope("conv_{}".format(filter_size)):
            filter_shape = [filter_size, params["embedding_size"], 1, params["num_filters"]]
            W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1, dtype="float64"), name="W")
            b = tf.Variable(tf.constant(0.1, shape=[params["num_filters"]], dtype="float64"), name="b")
            conv = tf.nn.conv2d(embedding_inputs, W, strides=[1, 1, 1, 1], padding="VALID", use_cudnn_on_gpu=True,
                                name="conv".format(filter_size))
            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu".format(filter_size))
            pooled = tf.nn.max_pool(
                h,
                ksize=[1, params["sequence_length"] - filter_size + 1, 1, 1],
                strides=[1, 1, 1, 1],
                padding="VALID",
                name="pool".format(filter_size)
            )
            pooled_outputs.append(pooled)

    # concatenate all feature vector
    number_filters_total = params["num_filters"] * len(params["filter_sizes"])
    h_pool = tf.concat(pooled_outputs, 3)
    h_pool_flat = tf.reshape(h_pool, [-1, number_filters_total])

    # dropout
    with tf.name_scope("dropout"):
        # h_drop = tf.nn.dropout(h_pool_flat, params["dropout_keep_prob"])
        h_drop = tf.nn.dropout(h_pool_flat, 1)

    # fully connected layer
    with tf.name_scope("output"):
        W = tf.Variable(
            tf.truncated_normal(shape=[number_filters_total, params["num_classes"]], stddev=0.1, dtype="float64"),
            name="W")
        b = tf.Variable(tf.constant(0.1, shape=[params["num_classes"]], dtype="float64"), name="b")
        l2_loss += tf.nn.l2_loss(W)
        l2_loss += tf.nn.l2_loss(b)
        logits = tf.nn.xw_plus_b(h_drop, W, b, name="scores")
    return logits, l2_loss


def model_fn_builder():
    def text_cnn_model_fn(features, labels, mode, params):
        logits, l2_loss = create_model(features, params)
        # train mode branch
        if mode == tf.estimator.ModeKeys.TRAIN:
            # loss
            with tf.name_scope("loss"):
                losses = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=logits)
                loss = tf.reduce_mean(losses) + params["l2_reg_lambda"] * l2_loss
            # optimizer function
            with tf.name_scope("optimizer"):
                optimizer = tf.train.AdamOptimizer(params["learning_rate"])
                grads_and_vars = optimizer.compute_gradients(loss)
                train_op = optimizer.apply_gradients(grads_and_vars, global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

        # eval mode branch
        if mode == tf.estimator.ModeKeys.EVAL:
            # loss
            with tf.name_scope("loss"):
                losses = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=logits)
                loss = tf.reduce_mean(losses) + params["l2_reg_lambda"] * l2_loss

            # predictions
            with tf.name_scope("prediction"):
                probability = tf.nn.softmax(logits, axis=1, name="probability")
                pred = tf.argmax(probability, axis=1, name="predictions")

            # metrics
            with tf.name_scope("metrics"):
                accuracy = tf.metrics.accuracy(labels=tf.argmax(labels, axis=1), predictions=pred)
                precision = tf.metrics.precision(labels=tf.argmax(labels, axis=1), predictions=pred)
                recall = tf.metrics.recall(labels=tf.argmax(labels, axis=1), predictions=pred)
                tf.summary.scalar("accuracy", accuracy[1])
                tf.summary.scalar("precision", precision[1])
                tf.summary.scalar("recall", recall[1])
                tf.summary.scalar("loss", loss)
                metrics = {"accuracy": accuracy, "precision": precision, "recall": recall}
                metric_hook = tf.train.LoggingTensorHook(
                    {"f1-score": 2 * precision[1] * recall[1] / (precision[1] + recall[1]), "precision": precision[1],
                     "recall": recall[1]}, every_n_iter=100)

            return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=metrics,
                                              evaluation_hooks=[metric_hook])

        # predict mode branch
        if mode == tf.estimator.ModeKeys.PREDICT:
            # predictions
            with tf.name_scope("prediction"):
                probability = tf.nn.softmax(logits, axis=1, name="probability")
                pred = tf.argmax(probability, axis=1, name="predictions")
                predictions = {
                    "class": pred,
                    "probability": probability,
                }
            return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    return text_cnn_model_fn

我期望两次预测的输出相同,但如下所示:

第一次

0.02336916147480053\0.02336916147480053

0.29461604884471243\0.29461604884471243

0.04555523004833724\0.04555523004833724

1\0.5450933830551228

0.042727966035733034\0.042727966035733034

0.032764190484837884\0.032764190484837884

0.11542703615898613\0.11542703615898613

0.12662708812885717\0.12662708812885717

0.01605587344580832\0.01605587344580832

0.006454832043875243\0.006454832043875243

第二次

0.03389085341620636\0.03389085341620636

0.31563690653966603\0.31563690653966603

0.06185060165562852\0.06185060165562852

1\0.5891016184323346

0.07184752629327144\0.07184752629327144

0.04355442431024522\0.04355442431024522

0.16290306166502935\0.16290306166502935

0.17214872864042816\0.17214872864042816

0.02437323886282706\0.02437323886282706

0.0109889405648392\0.0109889405648392

EN

回答 1

Stack Overflow用户

回答已采纳

发布于 2019-09-26 02:29:20

很久以前,我解决了这个问题。这种变化是由每次随机生成的字嵌入向量引起的。

票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/58083713

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档