我正在尝试学习关于XLA和JIT (https://www.tensorflow.org/performance/xla/jit)的教程。根据https://www.tensorflow.org/performance/xla/jit#step_3_run_with_xla,当我运行该命令时
https://www.tensorflow.org/performance/xla/jit#step_3_run_with_xla它应该生成一个输出,其中包含XLA图的位置。但是,我的输出不包括此信息。
Extracting /tmp/tensorflow/mnist/input_data/train-images-idx3-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/train-labels-idx1-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/t10k-images-idx3-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/t10k-labels-idx1-ubyte.gz
0.9172仅生成时间线文件。
内部版本:使用XLA JIT for CPU的张量流r1.3
发布于 2017-12-12 13:36:13
这里的关键是命令的这一部分:
TF_XLA_FLAGS=--xla_generate_hlo_graph=.*整个事情应该是:
TF_XLA_FLAGS=--xla_generate_hlo_graph=.* python mnist_softmax_xla.py这样,您应该会看到一堆代码行,如下所示:
I tensorflow/compiler/xla/service/hlo_graph_dumper.cc:1254] computation cluster_1[_XlaCompiledKernel=true,_XlaNumConstantArgs=0,_XlaNumResourceArgs=0].v31 [GPU-ir-emit-prepare: after flatten-call-graph, pipeline end]: /tmp/hlo_graph_67.5dOpgX.dot注意:我是在1.4而不是1.3上测试的。
发布于 2018-08-15 06:19:57
完整的命令应该是
TF_XLA_FLAGS="--xla_hlo_graph_path=./tmp_dot --xla_generate_hlo_graph=.*" python mnist_softmax_xla.py从源代码构建时,请确保具有xla选项。以及请求该设备。即
with tf.device("/job:localhost/replica:0/task:0/device:XLA_CPU:0"):另外,给出的示例使用的是tf.Variable,应该替换为tf.get_variable。
完整的代码将如下所示:
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
==============================================================================
"""Simple MNIST classifier example with JIT XLA and timelines.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import sys
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.python.client import timeline
FLAGS = None
def main(_):
# Import data
with tf.device("/job:localhost/replica:0/task:0/device:XLA_CPU:0"):
mnist = input_data.read_data_sets(FLAGS.data_dir)
# Create the model
x = tf.placeholder(tf.float32, [None, 784])
w = tf.get_variable("w",initializer=tf.zeros([784, 10]),use_resource=True)
b = tf.get_variable("b",initializer=tf.zeros([10]),use_resource=True)
y = tf.matmul(x, w) + b
# Define loss and optimizer
y_ = tf.placeholder(tf.int64, [None])
# The raw formulation of cross-entropy,
#
# tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)),
# reduction_indices=[1]))
#
# can be numerically unstable.
#
# So here we use tf.losses.sparse_softmax_cross_entropy on the raw
# logit outputs of 'y', and then average across the batch.
cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y)
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
config = tf.ConfigProto()
jit_level = 0
if FLAGS.xla:
# Turns on XLA JIT compilation.
jit_level = tf.OptimizerOptions.ON_1
config.graph_options.optimizer_options.global_jit_level = jit_level
run_metadata = tf.RunMetadata()
sess = tf.Session(config=config)
tf.global_variables_initializer().run(session=sess)
# Train
g = tf.Graph()
print(dir(g))
train_loops = 1000
for i in range(train_loops):
batch_xs, batch_ys = mnist.train.next_batch(100)
# Create a timeline for the last loop and export to json to view with
# chrome://tracing/.
if i == train_loops - 1:
sess.run(train_step,
feed_dict={x: batch_xs,
y_: batch_ys},
options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
run_metadata=run_metadata)
trace = timeline.Timeline(step_stats=run_metadata.step_stats)
with open('timeline.ctf.json', 'w') as trace_file:
trace_file.write(trace.generate_chrome_trace_format())
else:
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), y_)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy,
feed_dict={x: mnist.test.images,
y_: mnist.test.labels}))
sess.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--data_dir',
type=str,
default='/tmp/tensorflow/mnist/input_data',
help='Directory for storing input data')
parser.add_argument(
'--xla', type=bool, default=True, help='Turn xla via JIT on')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)https://stackoverflow.com/questions/45681405
复制相似问题