Merge commit for internal changes
diff --git a/RELEASE.md b/RELEASE.md
index e04bd3f..97c1a8c 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,3 +1,67 @@
+# Release 1.5.0
+
+## Breaking Changes
+* Prebuilt binaries are now built against CUDA 9 and cuDNN 7.
+* Our Linux binaries are built using ubuntu 16 containers, potentially
+ introducing glibc incompatibility issues with ubuntu 14.
+* Starting from 1.6 release, our prebuilt binaries will use AVX instructions.
+ This may break TF on older CPUs.
+
+## Major Features And Improvements
+* [Eager execution](https://github.com/tensorflow/tensorflow/tree/r1.5/tensorflow/contrib/eager)
+ preview version is now available.
+* [TensorFlow Lite](https://github.com/tensorflow/tensorflow/tree/r1.5/tensorflow/contrib/lite)
+ dev preview is now available.
+* CUDA 9 and cuDNN 7 support.
+
+## Bug Fixes and Other Changes
+* `auto_correlation` added to `tf.contrib.distributions`.
+* Add `DenseFlipout` probabilistic layer.
+* Restandardize `DenseVariational` as simpler template for other probabilistic layers.
+* Make `tf.contrib.distributions` QuadratureCompound classes support batch.
+* `Stream::BlockHostUntilDone` now returns Status rather than bool.
+* Customize request timeouts for the GCS filesystem.
+
+## Thanks to our Contributors
+
+This release contains contributions from many people at Google, as well as:
+
+4d55397500, Abdullah Alrasheed, abenmao, Adam Salvail, Aditya Dhulipala, Ag Ramesh,
+Akimasa Kimura, Alan Du, Alan Yee, Alexander, Amit Kushwaha, Amy, Andrei Costinescu,
+Andrei Nigmatulin, Andrew Erlichson, Andrew Myers, Andrew Stepanov, Androbin, AngryPowman,
+Anish Shah, Anton Daitche, Artsiom Chapialiou, asdf2014, Aseem Raj Baranwal, Ash Hall,
+Bart Kiers, Batchu Venkat Vishal, ben, Ben Barsdell, Bill Piel, Carl Thomé, Catalin Voss,
+Changming Sun, Chengzhi Chen, Chi Zeng, Chris Antaki, Chris Donahue, Chris Oelmueller,
+Chris Tava, Clayne Robison, Codrut, Courtial Florian, Dalmo Cirne, Dan J, Darren Garvey,
+David Kristoffersson, David Norman, David RöThlisberger, DavidNorman, Dhruv, DimanNe,
+Dorokhov, Duncan Mac-Vicar P, EdwardDixon, EMCP, error.d, FAIJUL, Fan Xia,
+Francois Xavier, Fred Reiss, Freedom" Koan-Sin Tan, Fritz Obermeyer, Gao, Xiang,
+Guenther Schmuelling, Guo Yejun (郭叶军), Hans Gaiser, HectorSVC, Hyungsuk Yoon,
+James Pruegsanusak, Jay Young, Jean Wanka, Jeff Carpenter, Jeremy Rutman, Jeroen BéDorf,
+Jett Jones, Jimmy Jia, jinghuangintel, jinze1994, JKurland, Joel Hestness, joetoth,
+John B Nelson, John Impallomeni, John Lawson, Jonas, Jonathan Dekhtiar, joshkyh, Jun Luan,
+Jun Mei, Kai Sasaki, Karl Lessard, karl@kubx.ca, Kb Sriram, Kenichi Ueno, Kevin Slagle,
+Kongsea, Lakshay Garg, lhlmgr, Lin Min, liu.guangcong, Loki Der Quaeler, Louie Helm,
+lucasmoura, Luke Iwanski, Lyndon White, Mahmoud Abuzaina, Marcel Puyat, Mark Aaron Shirley,
+Michele Colombo, MtDersvan, Namrata-Ibm, Nathan Luehr, Naurril, Nayana Thorat, Nicolas Lopez,
+Niranjan Hasabnis, Nolan Liu, Nouce, Oliver Hennigh, osdamv, Patrik Erdes,
+Patryk Chrabaszcz, Pavel Christof, Penghao Cen, postBG, Qingqing Cao, Qingying Chen, qjivy,
+Raphael, Rasmi, raymondxyang, Renze Yu, resec, Roffel, Ruben Vereecken, Ryohei Kuroki,
+sandipmgiri, Santiago Castro, Scott Kirkland, Sean Vig, Sebastian Raschka, Sebastian Weiss,
+Sergey Kolesnikov, Sergii Khomenko, Shahid, Shivam Kotwalia, Stuart Berg, Sumit Gouthaman,
+superzerg, Sven Mayer, tetris, Ti Zhou, Tiago Freitas Pereira, Tian Jin, Tomoaki Oiki,
+Vaibhav Sood, vfdev, Vivek Rane, Vladimir Moskva, wangqr, Weber Xie, Will Frey,
+Yan Facai (颜发才), yanivbl6, Yaroslav Bulatov, Yixing Lao, Yong Tang, youkaichao,
+Yuan (Terry) Tang, Yue Zhang, Yuxin Wu, Ziming Dong, ZxYuan, 黄璞
+
+We are also grateful to all who filed issues or helped resolve them, asked and
+answered questions, and were part of inspiring discussions.
+
+# Release 1.4.1
+
+## Bug Fixes and Other Changes
+* `LinearClassifier` fix for CloudML Engine.
+
# Release 1.4.0
## Major Features And Improvements
diff --git a/configure.py b/configure.py
index 7537e30..cf16ef4 100644
--- a/configure.py
+++ b/configure.py
@@ -302,6 +302,12 @@
Returns:
boolean value of the variable.
+
+ Raises:
+ UserInputError: if an environment variable is set, but it cannot be
+ interpreted as a boolean indicator, assume that the user has made a
+ scripting error, and will continue to provide invalid input.
+ Raise the error to avoid infinitely looping.
"""
if not question:
question = 'Do you wish to build TensorFlow with %s support?' % query_item
@@ -319,6 +325,23 @@
question += ' [y/N]: '
var = environ_cp.get(var_name)
+ if var is not None:
+ var_content = var.strip().lower()
+ true_strings = ('1', 't', 'true', 'y', 'yes')
+ false_strings = ('0', 'f', 'false', 'n', 'no')
+ if var_content in true_strings:
+ var = True
+ elif var_content in false_strings:
+ var = False
+ else:
+ raise UserInputError(
+ 'Environment variable %s must be set as a boolean indicator.\n'
+ 'The following are accepted as TRUE : %s.\n'
+ 'The following are accepted as FALSE: %s.\n'
+ 'Current value is %s.' % (
+ var_name, ', '.join(true_strings), ', '.join(false_strings),
+ var))
+
while var is None:
user_input_origin = get_input(question)
user_input = user_input_origin.strip().lower()
@@ -605,8 +628,9 @@
Raises:
UserInputError: if a query has been attempted n_ask_attempts times without
- success, assume that the user has made a scripting error, and will continue
- to provide invalid input. Raise the error to avoid infinitely looping.
+ success, assume that the user has made a scripting error, and will
+ continue to provide invalid input. Raise the error to avoid infinitely
+ looping.
"""
default = environ_cp.get(var_name) or var_default
full_query = '%s [Default is %s]: ' % (
@@ -1101,11 +1125,13 @@
def set_trisycl_include_dir(environ_cp):
"""Set TRISYCL_INCLUDE_DIR."""
+
ask_trisycl_include_dir = ('Please specify the location of the triSYCL '
'include directory. (Use --config=sycl_trisycl '
'when building with Bazel) '
'[Default is %s]: '
) % (_DEFAULT_TRISYCL_INCLUDE_DIR)
+
while True:
trisycl_include_dir = get_from_env_or_user_or_default(
environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 06c9c2b..808bd0c 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -386,6 +386,14 @@
visibility = ["//tensorflow:__subpackages__"],
)
+py_library(
+ name = "tensorflow_py",
+ srcs = ["__init__.py"],
+ srcs_version = "PY2AND3",
+ visibility = ["//visibility:public"],
+ deps = ["//tensorflow/python"],
+)
+
filegroup(
name = "all_opensource_files",
data = [
@@ -653,6 +661,9 @@
"//tensorflow/tools/quantization:all_files",
"//tensorflow/tools/test:all_files",
"//tensorflow/user_ops:all_files",
+ "//third_party/eigen3:all_files",
+ "//third_party/fft2d:all_files",
+ "//third_party/flatbuffers:all_files",
"//third_party/hadoop:all_files",
"//third_party/sycl:all_files",
"//third_party/sycl/sycl:all_files",
@@ -791,11 +802,3 @@
"tf_exported_symbols.lds",
],
)
-
-py_library(
- name = "tensorflow_py",
- srcs = ["__init__.py"],
- srcs_version = "PY2AND3",
- visibility = ["//visibility:public"],
- deps = ["//tensorflow/python"],
-)
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index c8afb7d..7652b49 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -90,8 +90,6 @@
":shape_inference",
"//tensorflow/compiler/xla:literal_util",
"//tensorflow/compiler/xla:shape_util",
- "//tensorflow/compiler/xla:status",
- "//tensorflow/compiler/xla:status_macros",
"//tensorflow/compiler/xla:statusor",
"//tensorflow/compiler/xla:types",
"//tensorflow/compiler/xla:util",
diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index c22f83a..3fe7e51 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -117,7 +117,6 @@
py_library(
name = "dataset_serialization_test",
- testonly = 1,
srcs = [
"dataset_serialization_test_base.py",
],
diff --git a/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py b/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py
index 7bc5007..f4b7d67 100644
--- a/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py
+++ b/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py
@@ -41,7 +41,7 @@
For those familiar with TensorFlow graphs, notice the absence of
`tf.Session`. The `forward()` method here immediately executes and
returns output values. The `loss()` method immediately compares the
- output of `forward()` with the target adn returns the MSE loss value.
+ output of `forward()` with the target and returns the MSE loss value.
The `fit()` performs gradient-descent training on the model's weights
and bias.
"""
diff --git a/tensorflow/contrib/eager/python/examples/mnist/mnist.py b/tensorflow/contrib/eager/python/examples/mnist/mnist.py
index bb121c7..82b3d39 100644
--- a/tensorflow/contrib/eager/python/examples/mnist/mnist.py
+++ b/tensorflow/contrib/eager/python/examples/mnist/mnist.py
@@ -40,7 +40,7 @@
"""MNIST Network.
Network structure is equivalent to:
- https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/examples/tutorials/mnist/mnist_deep.py
+ https://github.com/tensorflow/tensorflow/blob/r1.5/tensorflow/examples/tutorials/mnist/mnist_deep.py
and
https://github.com/tensorflow/models/blob/master/tutorials/image/mnist/convolutional.py
diff --git a/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
index 616240f..720c74e 100644
--- a/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
+++ b/tensorflow/contrib/libsvm/kernels/decode_libsvm_op.cc
@@ -46,34 +46,47 @@
std::vector<T> out_values;
std::vector<std::pair<int64, int64>> out_indices;
for (int i = 0; i < input_flat.size(); ++i) {
- std::vector<string> entries =
- str_util::Split(input_flat(i), " ", str_util::SkipEmpty());
- OP_REQUIRES(ctx, !entries.empty(),
- errors::InvalidArgument("No entries found for input[", i,
+ StringPiece line(input_flat(i));
+ str_util::RemoveWhitespaceContext(&line);
+
+ StringPiece piece;
+ OP_REQUIRES(ctx, str_util::ConsumeNonWhitespace(&line, &piece),
+ errors::InvalidArgument("No label found for input[", i,
"]: \"", input_flat(i), "\""));
+
Tlabel label_value;
- OP_REQUIRES(
- ctx, strings::SafeStringToNumeric<Tlabel>(entries[0], &label_value),
- errors::InvalidArgument("Label format incorrect: ", entries[0]));
+ OP_REQUIRES(ctx,
+ strings::SafeStringToNumeric<Tlabel>(piece, &label_value),
+ errors::InvalidArgument("Label format incorrect: ", piece));
+
label(i) = label_value;
- for (int j = 1; j < entries.size(); j++) {
- std::vector<string> pair = str_util::Split(entries[j], ":");
- OP_REQUIRES(
- ctx, (pair.size() == 2),
- errors::InvalidArgument("Invalid feature \"", entries[j], "\""));
+
+ str_util::RemoveLeadingWhitespace(&line);
+ while (str_util::ConsumeNonWhitespace(&line, &piece)) {
+ size_t p = piece.find(':');
+ OP_REQUIRES(ctx, (p != StringPiece::npos),
+ errors::InvalidArgument("Invalid feature \"", piece, "\""));
+
int64 feature_index;
OP_REQUIRES(
- ctx, strings::safe_strto64(pair[0].c_str(), &feature_index),
- errors::InvalidArgument("Feature format incorrect: ", entries[j]));
+ ctx, strings::safe_strto64(piece.substr(0, p), &feature_index),
+ errors::InvalidArgument("Feature format incorrect: ", piece));
OP_REQUIRES(ctx, (feature_index >= 0),
errors::InvalidArgument(
"Feature index should be >= 0, got ", feature_index));
+
T feature_value;
OP_REQUIRES(
- ctx, strings::SafeStringToNumeric<T>(pair[1], &feature_value),
- errors::InvalidArgument("Feature format incorrect: ", entries[j]));
+
+ ctx,
+ strings::SafeStringToNumeric<T>(piece.substr(p + 1),
+ &feature_value),
+ errors::InvalidArgument("Feature format incorrect: ", piece));
+
out_values.emplace_back(feature_value);
out_indices.emplace_back(std::pair<int64, int64>(i, feature_index));
+
+ str_util::RemoveLeadingWhitespace(&line);
}
}
diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md
index 9345303..0613de2 100644
--- a/tensorflow/contrib/makefile/README.md
+++ b/tensorflow/contrib/makefile/README.md
@@ -262,6 +262,14 @@
#### Optimization
+The `build_all_ios.sh` script can take optional command-line arguments to
+selectively register only for the operators used in your graph.
+
+```bash
+tensorflow/contrib/makefile/build_all_ios.sh -a arm64 -g $HOME/graphs/inception/tensorflow_inception_graph.pb
+```
+Please note this is an aggresive optimization of the operators and the resulting library may not work with other graphs but will reduce the size of the final library.
+
The `compile_ios_tensorflow.sh` script can take optional command-line arguments.
The first argument will be passed as a C++ optimization flag and defaults to
debug mode. If you are concerned about performance or are working on a release
diff --git a/tensorflow/contrib/makefile/build_all_ios.sh b/tensorflow/contrib/makefile/build_all_ios.sh
index 988e12b..a18df25 100755
--- a/tensorflow/contrib/makefile/build_all_ios.sh
+++ b/tensorflow/contrib/makefile/build_all_ios.sh
@@ -26,13 +26,16 @@
usage() {
echo "Usage: $(basename "$0") [-a:T]"
echo "-a [build_arch] build only for specified arch x86_64 [default=all]"
+ echo "-g [graph] optimize and selectively register ops only for this graph"
echo "-T only build tensorflow (dont download other deps etc)"
exit 1
}
-while getopts "a:T" opt_name; do
+DEFAULT_ARCH="i386 x86_64 armv7 armv7s arm64"
+while getopts "a:g:T" opt_name; do
case "$opt_name" in
a) BUILD_ARCH="${OPTARG}";;
+ g) OPTIMIZE_FOR_GRAPH="${OPTARG}";;
T) ONLY_MAKE_TENSORFLOW="true";;
*) usage;;
esac
@@ -42,7 +45,8 @@
# Make sure we're in the correct directory, at the root of the source tree.
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-cd ${SCRIPT_DIR}/../../../
+TOP_SRCDIR="${SCRIPT_DIR}/../../../"
+cd ${TOP_SRCDIR}
source "${SCRIPT_DIR}/build_helper.subr"
JOB_COUNT="${JOB_COUNT:-$(get_job_count)}"
@@ -56,6 +60,32 @@
export MACOSX_DEPLOYMENT_TARGET=$(sw_vers -productVersion)
fi
+PRNT_SLCTV_BIN="${TOP_SRCDIR}bazel-bin/tensorflow/python/tools/print_selective_registration_header"
+
+if [[ ! -z "${OPTIMIZE_FOR_GRAPH}" ]]; then
+ echo "Request to optimize for graph: ${OPTIMIZE_FOR_GRAPH}"
+ #Request to trim the OPs by selectively registering
+ if [ ! -f ${PRNT_SLCTV_BIN} ]; then
+ #Build bazel build tensorflow/python/tools:print_selective_registration_header
+ echo "${PRNT_SLCTV_BIN} not found. Trying to build it"
+ cd ${TOP_SRCDIR}
+ bazel build --copt="-DUSE_GEMM_FOR_CONV" tensorflow/python/tools:print_selective_registration_header
+ if [ ! -f ${PRNT_SLCTV_BIN} ]; then
+ echo "Building print_selective_registration_header failed"
+ echo "You may want to build TensorFlow with: "
+ echo "./configure"
+ echo "bazel build --copt="-DUSE_GEMM_FOR_CONV" tensorflow/python/tools:print_selective_registration_header"
+ echo "and then run this script again"
+ exit 1
+ fi
+ else
+ echo "${PRNT_SLCTV_BIN} found. Using it"
+ ${PRNT_SLCTV_BIN} --graphs=${OPTIMIZE_FOR_GRAPH} > ${TOP_SRCDIR}/tensorflow/core/framework/ops_to_register.h
+
+ fi
+
+fi
+
if [[ "${ONLY_MAKE_TENSORFLOW}" != "true" ]]; then
# Remove any old files first.
make -f tensorflow/contrib/makefile/Makefile clean
@@ -64,8 +94,13 @@
# Pull down the required versions of the frameworks we need.
tensorflow/contrib/makefile/download_dependencies.sh
- # Compile protobuf for the target iOS device architectures.
- tensorflow/contrib/makefile/compile_ios_protobuf.sh
+ if [[ -z "${BUILD_ARCH}" ]]; then
+ # Compile protobuf for the target iOS device architectures.
+ tensorflow/contrib/makefile/compile_ios_protobuf.sh -a ${DEFAULT_ARCH}
+ else
+ # Compile protobuf for the target iOS device architectures.
+ tensorflow/contrib/makefile/compile_ios_protobuf.sh -a ${BUILD_ARCH}
+ fi
fi
# Compile nsync for the target iOS device architectures.
@@ -80,13 +115,24 @@
fi
export HOST_NSYNC_LIB TARGET_NSYNC_LIB
-if [[ -z "${BUILD_ARCH}" ]]; then
- # build the ios tensorflow libraries.
- tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -h $HOST_NSYNC_LIB -n $TARGET_NSYNC_LIB
-else
+TF_CC_FLAGS="-O3"
+TF_SCRIPT_FLAGS="-h ${HOST_NSYNC_LIB} -n ${TARGET_NSYNC_LIB}"
+
+if [[ ! -z "${OPTIMIZE_FOR_GRAPH}" ]]; then
# arch specified so build just that
- tensorflow/contrib/makefile/compile_ios_tensorflow.sh -f "-O3" -a "${BUILD_ARCH}" -h $HOST_NSYNC_LIB -n $TARGET_NSYNC_LIB
+ TF_CC_FLAGS="${TF_CC_FLAGS} -DANDROID_TYPES=__ANDROID_TYPES_FULL__ -DSELECTIVE_REGISTRATION -DSUPPORT_SELECTIVE_REGISTRATION"
+ # The Makefile checks the env var to decide which ANDROID_TYPES to build
+ export ANDROID_TYPES="-D__ANDROID_TYPES_FULL__"
fi
+if [[ ! -z "${BUILD_ARCH}" ]]; then
+ # arch specified so build just that
+ TF_SCRIPT_FLAGS="${TF_SCRIPT_FLAGS} -a ${BUILD_ARCH}"
+fi
+
+# build the ios tensorflow libraries.
+echo "Building TensorFlow with flags: ${TF_SCRIPT_FLAGS} -f ${TF_CC_FLAGS}"
+tensorflow/contrib/makefile/compile_ios_tensorflow.sh ${TF_SCRIPT_FLAGS} -f "${TF_CC_FLAGS}"
+
# Creates a static universal library in
# tensorflow/contrib/makefile/gen/lib/libtensorflow-core.a
diff --git a/tensorflow/contrib/mpi_collectives/mpi_ops.cc b/tensorflow/contrib/mpi_collectives/mpi_ops.cc
deleted file mode 100644
index a051ab0..0000000
--- a/tensorflow/contrib/mpi_collectives/mpi_ops.cc
+++ /dev/null
@@ -1,1236 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifdef TENSORFLOW_USE_MPI
-
-#include <queue>
-#include <thread>
-#include <unordered_map>
-
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/framework/types.pb.h"
-#include "tensorflow/core/platform/mutex.h"
-
-#define EIGEN_USE_THREADS
-
-#if GOOGLE_CUDA
-#include <cuda_runtime.h>
-#include "tensorflow/stream_executor/stream.h"
-#endif
-
-#include "tensorflow/stream_executor/lib/statusor.h"
-
-#define OMPI_SKIP_MPICXX
-#include "third_party/mpi/mpi.h"
-#include "tensorflow/contrib/mpi_collectives/mpi_message.pb.h"
-#include "tensorflow/contrib/mpi_collectives/ring.h"
-
-/*
- * MPI Allreduce and Allgather Ops for TensorFlow.
- *
- * TensorFlow natively provides inter-device communication through send and
- * receive ops and inter-node communication through Distributed TensorFlow,
- * based on the same send and receive abstractions. These end up being
- * insufficient for synchronous data-parallel training on HPC clusters where
- * Infiniband or other high-speed interconnects are available. This module
- * implements MPI ops for allgather and allreduce, which do bandwidth-optimal
- * gathers and reductions and can take advantage of hardware-optimized
- * communication libraries through the MPI implementation.
- *
- * The primary logic of the allreduce and allgather are in RingAllgather() and
- * RingAllreduce(). The background thread which facilitates MPI operations is
- * run in BackgroundThreadLoop(). The provided MPI ops are:
- * – MPIInit:
- * Initialize MPI on a given device (CPU or GPU).
- * Should only be run on a single device in every process.
- * – MPISize:
- * Get the number of MPI processes in the global communicator.
- * – MPIRank:
- * Get the rank of the current MPI process in the global communicator.
- * – MPILocalRank:
- * Get the local rank of the current MPI process within its node.
- * – MPIAllreduce:
- * Perform an allreduce on a Tensor, returning the sum
- * across all MPI processes in the global communicator.
- * – MPIAllgather:
- * Perform an allgather on a Tensor, returning the concatenation of
- * the tensor on the first dimension across all MPI processes in the
- * global communicator.
- *
- */
-
-template <class T>
-using StatusOr = perftools::gputools::port::StatusOr<T>;
-
-using CPUDevice = Eigen::ThreadPoolDevice;
-using GPUDevice = Eigen::GpuDevice;
-
-namespace tensorflow {
-namespace contrib {
-namespace mpi {
-
-// Make sure template specializations are generated in the ring.cu.cc and the
-// ring.cc file, not in this file.
-extern template Status RingAllreduce<GPUDevice, int>(OpKernelContext*,
- const Tensor*, Tensor*,
- Tensor*);
-extern template Status RingAllreduce<GPUDevice, long long>(OpKernelContext*,
- const Tensor*,
- Tensor*, Tensor*);
-extern template Status RingAllreduce<GPUDevice, float>(OpKernelContext*,
- const Tensor*, Tensor*,
- Tensor*);
-extern template Status RingAllgather<GPUDevice, int>(OpKernelContext*,
- const Tensor*,
- const std::vector<size_t>&,
- Tensor*);
-extern template Status RingAllgather<GPUDevice, long long>(
- OpKernelContext*, const Tensor*, const std::vector<size_t>&, Tensor*);
-extern template Status RingAllgather<GPUDevice, float>(
- OpKernelContext*, const Tensor*, const std::vector<size_t>&, Tensor*);
-extern template Status RingAllreduce<CPUDevice, int>(OpKernelContext*,
- const Tensor*, Tensor*,
- Tensor*);
-extern template Status RingAllreduce<CPUDevice, long long>(OpKernelContext*,
- const Tensor*,
- Tensor*, Tensor*);
-extern template Status RingAllreduce<CPUDevice, float>(OpKernelContext*,
- const Tensor*, Tensor*,
- Tensor*);
-extern template Status RingAllgather<CPUDevice, int>(OpKernelContext*,
- const Tensor*,
- const std::vector<size_t>&,
- Tensor*);
-extern template Status RingAllgather<CPUDevice, long long>(
- OpKernelContext*, const Tensor*, const std::vector<size_t>&, Tensor*);
-extern template Status RingAllgather<CPUDevice, float>(
- OpKernelContext*, const Tensor*, const std::vector<size_t>&, Tensor*);
-
-namespace {
-
-// Return true if the templated type is GPUDevice, otherwise false.
-template <typename T>
-bool IsGPUDevice();
-template <>
-bool IsGPUDevice<GPUDevice>() {
- return true;
-};
-template <>
-bool IsGPUDevice<CPUDevice>() {
- return false;
-};
-
-// A callback to call after the MPI communication completes. Since the
-// allreduce and allgather ops are asynchronous, this callback is what resumes
-// computation after the reduction is completed.
-typedef std::function<void(StatusOr<Tensor>)> CommunicationDoneCallback;
-
-struct CollectiveOpRecord {
- // The rank performing this piece of the op
- int rank;
-
- // The name of the op/tensor to be reduced
- std::string name;
-
- // The op's kernel context
- OpKernelContext* context;
-
- // Data type of the op
- DataType dtype;
-
- // The input tensor
- const Tensor* in_t;
-
- // Allgather: Vector of per-rank first-dimension sizes
- std::vector<size_t> sizes_vec;
-
- // The temp tensor for intermediate results
- Tensor temp_t;
-
- // The output tensor
- Tensor* out_t;
-
- // Whether to run this op on the gpu
- bool on_gpu;
-
- // The callback to call after the op has completed
- CommunicationDoneCallback callback;
-};
-
-// Table storing Tensors to be reduced, keyed by unique name.
-// This table contains everything necessary to do the reduction
-typedef std::unordered_map<std::string, CollectiveOpRecord> TensorTable;
-
-// Table for storing Tensor metadata on rank zero. This is used for error
-// checking and size calculations, as well as determining when a reduction is
-// ready to be done (when all nodes are ready to do it).
-typedef std::unordered_map<std::string, std::vector<MPIRequest> > MessageTable;
-
-// The global state required for the MPI ops.
-//
-// MPI is a library that stores a lot of global per-program state and often
-// requires running on a single thread. As a result, we have to have a single
-// background thread responsible for all MPI operations, and communicate with
-// that background thread through global state.
-struct MPIGlobalState {
- // An atomic boolean which is set to true when MPI is initialized.
- // This ensures that MPI_Init is never called twice.
- std::atomic_flag initialized_flag = ATOMIC_FLAG_INIT;
-
- // Condition variable to wait for initialization
- condition_variable cv;
-
- // Whether MPI_Init has been completed on the background thread.
- bool initialization_done = false;
-
- // Whether MPI_Init succeeded on the background thread.
- Status init_status;
-
- // A mutex that needs to be used whenever MPI operations touch
- // shared structures.
- mutex mu;
-
- // Tensors waiting to be allreduced or allgathered.
- TensorTable tensor_table;
-
- // Queue of MPI requests waiting to be sent to the coordinator node.
- std::queue<MPIRequest> message_queue;
-
- // Background thread running MPI communication.
- std::thread background_thread;
-
- // Whether the background thread should shutdown.
- bool shut_down = false;
-
- // Only exists on the coordinator node (rank zero). Maintains a count of
- // how many nodes are ready to allreduce every tensor (keyed by tensor
- // name).
- std::unique_ptr<MessageTable> message_table;
-
- // The MPI rank, local rank, and size.
- int rank = 0;
- int local_rank = 0;
- int size = 1;
-
- // The device that MPI was initialized on. (-1 for no GPU)
- int device = -1;
-
- // The CUDA stream used for data transfers and within-allreduce operations.
- // A naive implementation would use the TensorFlow StreamExecutor CUDA
- // stream. However, the allreduce and allgather require doing memory copies
- // and kernel executions (for accumulation of values on the GPU). However,
- // the subsequent operations must wait for those operations to complete,
- // otherwise MPI (which uses its own stream internally) will begin the data
- // transfers before the CUDA calls are complete. In order to wait for those
- // CUDA operations, if we were using the TensorFlow stream, we would have
- // to synchronize that stream; however, other TensorFlow threads may be
- // submitting more work to that stream, so synchronizing on it can cause
- // the allreduce to be delayed, waiting for compute totally unrelated to it
- // in other parts of the graph. Overlaying memory transfers and compute
- // during backpropagation is crucial for good performance, so we cannot use
- // the TensorFlow stream, and must use our own stream.
-#if GOOGLE_CUDA
- cudaStream_t stream;
- std::atomic_flag stream_created_flag = ATOMIC_FLAG_INIT;
-#endif
-
- ~MPIGlobalState() {
- // Make sure that the destructor of the background thread is safe to
- // call. If a thread is still joinable (not detached or complete) its
- // destructor cannot be called.
- if (background_thread.joinable()) {
- shut_down = true;
- background_thread.join();
- }
- }
-};
-
-// All the MPI state that must be stored globally per-process.
-static MPIGlobalState mpi_global;
-
-// For clarify in argument lists.
-#define RANK_ZERO 0
-
-// A tag used for all coordinator messaging.
-#define TAG_NOTIFY 1
-
-// Store the MPIRequest for a name, and return whether the total count of
-// MPIRequests for that tensor is now equal to the MPI size (and thus we are
-// ready to reduce the tensor).
-bool IncrementTensorCount(std::unique_ptr<MessageTable>& message_table,
- MPIRequest msg, int mpi_size) {
- auto name = msg.tensor_name();
- auto table_iter = message_table->find(name);
- if (table_iter == message_table->end()) {
- message_table->emplace(name, std::vector<MPIRequest>({msg}));
- table_iter = message_table->find(name);
- } else {
- table_iter->second.push_back(msg);
- }
-
- int count = table_iter->second.size();
- return count == mpi_size;
-}
-
-// Once a tensor is ready to be reduced, the coordinator sends an MPIResponse
-// instructing all ranks to start the reduction to all ranks. The MPIResponse
-// also contains error messages in case the submitted MPIRequests were not
-// valid (for example, contained mismatched shapes or types).
-//
-// Constructing the MPIResponse, thus, requires a whole lot of error checking.
-MPIResponse ConstructMPIResponse(std::unique_ptr<MessageTable>& message_table,
- std::string name) {
- bool error = false;
- auto it = message_table->find(name);
- assert(it != message_table->end());
-
- std::vector<MPIRequest> requests = it->second;
- assert(requests.size() > 0);
-
- std::ostringstream error_message_stream;
-
- // Check that all data types being reduced or gathered are identical
- auto data_type = requests[0].tensor_type();
- for (unsigned int i = 1; i < requests.size(); i++) {
- auto request_type = requests[i].tensor_type();
- if (data_type != request_type) {
- error = true;
- error_message_stream << "Mismatched data types: One rank had type "
- << DataType_Name(data_type)
- << ", but another rank had type "
- << DataType_Name(request_type) << ".";
- break;
- }
- }
-
- // Check that all requested operations are the same
- auto message_type = requests[0].request_type();
- for (unsigned int i = 1; i < requests.size(); i++) {
- if (error) {
- break;
- }
-
- auto request_type = requests[i].request_type();
- if (message_type != request_type) {
- error = true;
- error_message_stream << "Mismatched MPI operations: One rank did an "
- << message_type << ", but another rank did an "
- << request_type << ".";
- break;
- }
- }
-
- // If we are doing an allreduce, check that all tensor shapes
- // are identical
- if (message_type == MPIRequest::ALLREDUCE) {
- TensorShape tensor_shape = requests[0].tensor_shape();
- for (unsigned int i = 1; i < requests.size(); i++) {
- if (error) {
- break;
- }
-
- TensorShape request_shape = requests[i].tensor_shape();
- if (tensor_shape != request_shape) {
- error = true;
- error_message_stream << "Mismatched allreduce tensor shapes: "
- << "One rank reduced a tensor of shape "
- << tensor_shape.DebugString()
- << ", but another rank sent a tensor of shape "
- << request_shape.DebugString() << ".";
- break;
- }
- }
- }
-
- // If we are doing an allgather, make sure all but the first dimension are
- // the same. The first dimension may be different and the output tensor is
- // the sum of the first dimension. Collect the sizes by rank.
- if (message_type == MPIRequest::ALLGATHER) {
- TensorShape tensor_shape = requests[0].tensor_shape();
-
- if (tensor_shape.dims() == 0) {
- error = true;
- error_message_stream << "Rank zero tried to gather a rank-zero tensor.";
- }
-
- for (unsigned int i = 1; i < requests.size(); i++) {
- if (error) {
- break;
- }
-
- TensorShape request_shape = requests[i].tensor_shape();
- if (tensor_shape.dims() != request_shape.dims()) {
- error = true;
- error_message_stream << "Mismatched allgather tensor shapes: "
- << "One rank gathered a tensor of rank "
- << tensor_shape.dims()
- << ", but another rank sent a tensor of rank "
- << request_shape.dims() << ".";
- break;
- }
-
- for (unsigned int dim = 1; dim < tensor_shape.dims(); dim++) {
- if (tensor_shape.dim_size(dim) != request_shape.dim_size(dim)) {
- error = true;
- error_message_stream
- << "Mismatched allgather tensor shapes: "
- << "One rank gathered a tensor with dimension " << dim
- << " equal to " << tensor_shape.dim_size(dim)
- << ", but another rank sent a tensor with dimension " << dim
- << " equal to " << request_shape.dim_size(dim) << ".";
- break;
- }
- }
- }
- }
-
- MPIResponse response;
- response.set_tensor_name(name);
- if (error) {
- std::string error_message = error_message_stream.str();
- response.set_response_type(MPIResponse::ERROR);
- response.set_error_message(error_message);
- } else {
- auto response_type = MPIResponse::ERROR;
- if (message_type == MPIRequest::ALLREDUCE) {
- response_type = MPIResponse::ALLREDUCE;
- } else {
- response_type = MPIResponse::ALLGATHER;
- }
- response.set_response_type(response_type);
- }
-
- // Clear all queued up requests for this name. They are now taken care of
- // by the constructed MPI response.
- message_table->erase(it);
-
- return response;
-}
-
-// Process an MPIResponse by doing a reduction, a gather, or raising an error.
-void PerformCollectiveOp(TensorTable& tensor_table, MPIResponse response) {
- OpKernelContext* context;
- const Tensor* input_tensor;
- std::vector<size_t> sizes_vec;
- Tensor temp_tensor;
- Tensor* output_tensor;
- CommunicationDoneCallback callback;
- bool on_gpu;
- {
- // Lock on the tensor table.
- mutex_lock guard(mpi_global.mu);
-
- // We should never fail at finding this key in the tensor table.
- auto name = response.tensor_name();
- auto iter = tensor_table.find(name);
- assert(iter != tensor_table.end());
-
- assert(response.response_type() == MPIResponse::ALLREDUCE ||
- response.response_type() == MPIResponse::ALLGATHER ||
- response.response_type() == MPIResponse::ERROR);
-
- CollectiveOpRecord record = iter->second;
- context = record.context;
- input_tensor = record.in_t;
- sizes_vec = record.sizes_vec;
- temp_tensor = record.temp_t;
- output_tensor = record.out_t;
- on_gpu = record.on_gpu;
- callback = record.callback;
-
- // Clear the tensor table of this tensor and its callbacks; the rest of
- // this function takes care of it.
- tensor_table.erase(iter);
- }
-
- // Use CPUDevice instead of GPUDevice if no CUDA, to ensure we don't
- // link to non-existent symbols.
-#if GOOGLE_CUDA
-#define GPU_DEVICE_IF_CUDA GPUDevice
-#else
-#define GPU_DEVICE_IF_CUDA CPUDevice
-#endif
-
- Status status;
- auto dtype = input_tensor->dtype();
- if (response.response_type() == MPIResponse::ALLGATHER) {
- if (dtype == DT_FLOAT) {
- status = on_gpu ? RingAllgather<GPU_DEVICE_IF_CUDA, float>(
- context, input_tensor, sizes_vec, output_tensor)
- : RingAllgather<CPUDevice, float>(
- context, input_tensor, sizes_vec, output_tensor);
- } else if (dtype == DT_INT32) {
- status = on_gpu ? RingAllgather<GPU_DEVICE_IF_CUDA, int>(
- context, input_tensor, sizes_vec, output_tensor)
- : RingAllgather<CPUDevice, int>(context, input_tensor,
- sizes_vec, output_tensor);
- } else if (dtype == DT_INT64) {
- status = on_gpu ? RingAllgather<GPU_DEVICE_IF_CUDA, long long>(
- context, input_tensor, sizes_vec, output_tensor)
- : RingAllgather<CPUDevice, long long>(
- context, input_tensor, sizes_vec, output_tensor);
- } else {
- status = errors::Unknown("Invalid tensor type for MPI allgather.");
- }
- } else if (response.response_type() == MPIResponse::ALLREDUCE) {
- if (dtype == DT_FLOAT) {
- status = on_gpu ? RingAllreduce<GPU_DEVICE_IF_CUDA, float>(
- context, input_tensor, &temp_tensor, output_tensor)
- : RingAllreduce<CPUDevice, float>(
- context, input_tensor, &temp_tensor, output_tensor);
- } else if (dtype == DT_INT32) {
- status = on_gpu ? RingAllreduce<GPU_DEVICE_IF_CUDA, int>(
- context, input_tensor, &temp_tensor, output_tensor)
- : RingAllreduce<CPUDevice, int>(
- context, input_tensor, &temp_tensor, output_tensor);
- } else if (dtype == DT_INT64) {
- status = on_gpu ? RingAllreduce<GPU_DEVICE_IF_CUDA, long long>(
- context, input_tensor, &temp_tensor, output_tensor)
- : RingAllreduce<CPUDevice, long long>(
- context, input_tensor, &temp_tensor, output_tensor);
- } else {
- status = errors::Unknown("Invalid tensor type for MPI allreduce.");
- }
- } else if (response.response_type() == MPIResponse::ERROR) {
- status = errors::FailedPrecondition(response.error_message());
- }
-
- if (status.ok()) {
- callback(StatusOr<Tensor>(*output_tensor));
- } else {
- callback(StatusOr<Tensor>(status));
- }
-}
-
-// The MPI background thread loop coordinates all the MPI processes and the
-// tensor reductions. The design of the communicator mechanism is limited by a
-// few considerations:
-//
-// 1. Some MPI implementations require all MPI calls to happen from a
-// single thread. Since TensorFlow may use several threads for graph
-// processing, this means we must have our own dedicated thread for
-// dealing with MPI.
-// 2. We want to gracefully handle errors, when MPI processes do not
-// properly agree upon what should happen (such as mismatched types or
-// shapes). To do so requires the MPI processes to know about the shapes
-// and types of the relevant tensors on the other processes.
-// 3. The MPI reductions and gathers should be able to happen in parallel
-// with other ongoing operations. Since MPI uses an internal
-// (inaccessible) GPU stream separate from the TF GPUDevice streams, we
-// cannot explicitly synchronize memcpys or kernels with it. As a result,
-// MPIAllreduce and MPIAllgather must be AsyncOpKernels to ensure proper
-// ordering of memcpys and kernels with respect to TF streams.
-// 4. NOTE: We cannot guarantee that all the MPI processes reduce their
-// tensors in the same order. Thus, there must be a way to ensure the
-// reduction memcpys and kernels occur for correct tensors across all
-// ranks at the same time. We choose to use a coordinator (rank ID 0) to
-// gather and trigger the reduction operations that are ready to execute.
-//
-// The coordinator currently follows a master-worker paradigm. Rank zero acts
-// as the master (the "coordinator"), whereas all other ranks are simply
-// workers. Each rank runs its own background thread which progresses in ticks.
-// In each tick, the following actions happen:
-//
-// a) The workers send any available MPIRequests to the coordinator. These
-// MPIRequests indicate what the worker would like to do (i.e. which
-// tensor they would like to gather or reduce, as well as their shape and
-// type). They repeat this for every tensor that they would like to
-// operate on after that tensor's collective op has executed ComputeAsync.
-//
-// b) The workers send an empty "DONE" message to the coordinator to
-// indicate that there are no more tensors they wish to operate on.
-//
-// c) The coordinator receives the MPIRequests from the workers, as well
-// as from its own TensorFlow ops, and stores them in a request table. The
-// coordinator continues to receive MPIRequest messages until it has
-// received MPI_SIZE number of empty "DONE" messages.
-//
-// d) The coordinator finds all tensors that are ready to be reduced,
-// gathered, or all operations that result in an error. For each of those,
-// it sends an MPIResponse to all the workers. When no more MPIResponses
-// are available, it sends a "DONE" response to the workers. If the
-// process is being shutdown, it instead sends a "SHUTDOWN" response.
-//
-// e) The workers listen for MPIResponse messages, processing each one by
-// doing the required reduce or gather, until they receive a "DONE"
-// response from the coordinator. At that point, the tick ends.
-// If instead of "DONE" they receive "SHUTDOWN", they exit their
-// background loop.
-// TODO: Use the global mpi_global state variable instead of a local one
-void BackgroundThreadLoop() {
-#if GOOGLE_CUDA
- // Set the device, so that this thread uses the same GPU context as the
- // calling thread.
- // TODO: Ensure that this is operating correctly. The background thread
- // needs to be able to control all GPUs that the rank has access to, and
- // might be more than 1 GPU. Tensors could be resident in any of the
- // GPUs, so the background thread's accumulate and copy kernels might need
- // to correctly set the device and it might be necessary for the background
- // thread to manage multiple streams.
- cudaSetDevice(mpi_global.device);
- cudaStreamCreate(&mpi_global.stream);
-#endif
-
- // Initialize MPI. This must happen on the background thread, since not all
- // MPI implementations support being called from multiple threads.
- auto init_result = MPI_Init(NULL, NULL);
- if (init_result != MPI_SUCCESS) {
- mpi_global.init_status =
- errors::Unknown("Could not initialize MPI; MPI_Init() failed.");
- mpi_global.initialization_done = true;
- mpi_global.cv.notify_all();
- return;
- } else {
- mpi_global.init_status = Status::OK();
- }
-
- // Get MPI rank to determine if we are rank zero.
- int rank;
- MPI_Comm_rank(MPI_COMM_WORLD, &rank);
- bool is_coordinator = rank == 0;
-
- // Get MPI size to determine how many tensors to wait for before reducing.
- int size;
- MPI_Comm_size(MPI_COMM_WORLD, &size);
-
- // Determine local rank by querying the local communicator.
- MPI_Comm local_comm;
- MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,
- &local_comm);
- int local_rank;
- MPI_Comm_rank(local_comm, &local_rank);
-
- mpi_global.rank = rank;
- mpi_global.local_rank = local_rank;
- mpi_global.size = size;
- mpi_global.initialization_done = true;
-
- // Notify calling thread that initialization is complete
- mpi_global.cv.notify_all();
-
- // TODO: MOVE MESSAGE TABLE INITIALIZATION TO LIBRARY LOAD!
- // Initialize the tensor count table. No tensors are available yet.
- if (is_coordinator) {
- mpi_global.message_table =
- std::unique_ptr<MessageTable>(new MessageTable());
- }
-
- // The coordinator sends a SHUTDOWN message to trigger shutdown.
- bool should_shut_down = false;
- do {
- // TODO: Eliminate the need for thread sleep by making all activity
- // depend on other activity (e.g. condition or MPI waits).
- std::this_thread::sleep_for(std::chrono::milliseconds(1));
-
- // Copy the data structures from global state under this lock.
- // However, don't keep the lock for the rest of the loop, so that
- // enqueued stream callbacks can continue.
- std::queue<MPIRequest> message_queue;
- {
- mutex_lock guard(mpi_global.mu);
- while (!mpi_global.message_queue.empty()) {
- MPIRequest message = mpi_global.message_queue.front();
- mpi_global.message_queue.pop();
- message_queue.push(message);
- }
- }
-
- // Collect all tensors that are ready to be reduced. Record them in the
- // tensor count table (rank zero) or send them to rank zero to be
- // recorded (everyone else).
- std::vector<std::string> ready_to_reduce;
- while (!message_queue.empty()) {
- // Pop the first available message message
- MPIRequest message = message_queue.front();
- message_queue.pop();
-
- if (is_coordinator) {
- bool reduce =
- IncrementTensorCount(mpi_global.message_table, message, size);
- if (reduce) {
- ready_to_reduce.push_back(message.tensor_name());
- }
- } else {
- std::string encoded_message;
- message.SerializeToString(&encoded_message);
- MPI_Send(encoded_message.c_str(), encoded_message.length() + 1,
- MPI_BYTE, RANK_ZERO, TAG_NOTIFY, MPI_COMM_WORLD);
- }
- }
-
- // Rank zero has put all its own tensors in the tensor count table.
- // Now, it should count all the tensors that are coming from other
- // ranks at this tick. It should keep getting tensors until it gets a
- // DONE message from all the other ranks.
- if (is_coordinator) {
- // Count of DONE messages. Keep receiving messages until the number
- // of messages is equal to the number of processes. Initialize to
- // one since the coordinator is effectively done.
- int completed_ranks = 1;
- while (completed_ranks != size) {
- MPI_Status status;
- MPI_Probe(MPI_ANY_SOURCE, TAG_NOTIFY, MPI_COMM_WORLD, &status);
-
- // Find number of characters in message (including zero byte).
- int source_rank = status.MPI_SOURCE;
- int msg_length;
- MPI_Get_count(&status, MPI_BYTE, &msg_length);
-
- // If the length is zero, this is a DONE message.
- if (msg_length == 0) {
- completed_ranks++;
- MPI_Recv(NULL, 0, MPI_BYTE, source_rank, TAG_NOTIFY, MPI_COMM_WORLD,
- &status);
- continue;
- }
-
- // Get tensor name from MPI into an std::string.
- char* buffer = new char[msg_length];
- MPI_Recv(buffer, msg_length, MPI_BYTE, source_rank, TAG_NOTIFY,
- MPI_COMM_WORLD, &status);
- std::string received_data(buffer);
- delete[] buffer;
-
- MPIRequest received_message;
- received_message.ParseFromString(received_data);
- auto received_name = received_message.tensor_name();
-
- bool reduce = IncrementTensorCount(mpi_global.message_table,
- received_message, size);
- if (reduce) {
- ready_to_reduce.push_back(received_name);
- }
- }
-
- // At this point, rank zero should have a fully updated tensor
- // count table and should know all the tensors that need to be
- // reduced or gathered, and everyone else should have sent all
- // their information to rank zero. We can now do reductions and
- // gathers; rank zero will choose which ones and in what order,
- // and will notify the other ranks before doing each reduction.
- for (int i = 0; i < ready_to_reduce.size(); i++) {
- // Notify all nodes which tensor we'd like to reduce now
- auto name = ready_to_reduce[i];
- MPIResponse response =
- ConstructMPIResponse(mpi_global.message_table, name);
-
- std::string encoded_response;
- response.SerializeToString(&encoded_response);
- for (int r = 1; r < size; r++) {
- MPI_Send(encoded_response.c_str(), encoded_response.length() + 1,
- MPI_BYTE, r, TAG_NOTIFY, MPI_COMM_WORLD);
- }
-
- // Perform the reduction. All nodes should end up performing
- // the same reduction.
- PerformCollectiveOp(mpi_global.tensor_table, response);
- }
-
- // Notify all nodes that we are done with the reductions for this
- // tick.
- MPIResponse done_response;
- should_shut_down = mpi_global.shut_down;
- done_response.set_response_type(
- mpi_global.shut_down ? MPIResponse::SHUTDOWN : MPIResponse::DONE);
- std::string encoded_response;
- done_response.SerializeToString(&encoded_response);
- for (int r = 1; r < size; r++) {
- MPI_Send(encoded_response.c_str(), encoded_response.length() + 1,
- MPI_BYTE, r, TAG_NOTIFY, MPI_COMM_WORLD);
- }
- } else {
- // Notify the coordinator that this node is done sending messages.
- // A DONE message is encoded as a zero-length message.
- MPI_Send(NULL, 0, MPI_BYTE, RANK_ZERO, TAG_NOTIFY, MPI_COMM_WORLD);
-
- // Receive names for tensors to reduce from rank zero. Once we
- // receive a empty DONE message, stop waiting for more names.
- while (true) {
- MPI_Status status;
- MPI_Probe(0, TAG_NOTIFY, MPI_COMM_WORLD, &status);
-
- // Find number of characters in message (including zero byte).
- int msg_length;
- MPI_Get_count(&status, MPI_BYTE, &msg_length);
-
- // Get tensor name from MPI into an std::string.
- char* buffer = new char[msg_length];
- MPI_Recv(buffer, msg_length, MPI_BYTE, 0, TAG_NOTIFY, MPI_COMM_WORLD,
- &status);
- std::string received_message(buffer);
- delete[] buffer;
-
- MPIResponse response;
- response.ParseFromString(received_message);
- if (response.response_type() == MPIResponse::DONE) {
- // No more messages this tick
- break;
- } else if (response.response_type() == MPIResponse::SHUTDOWN) {
- // No more messages this tick, and the background thread
- // should shut down
- should_shut_down = true;
- break;
- } else {
- // Process the current message
- PerformCollectiveOp(mpi_global.tensor_table, response);
- }
- }
- }
- } while (!should_shut_down);
-
- MPI_Finalize();
-}
-
-// Initialize MPI and start the MPI background thread. Ensure that this is
-// only done once no matter how many times this function is called.
-Status InitializeMPIOnce(bool gpu) {
- // Ensure MPI is only initialized once.
- if (mpi_global.initialized_flag.test_and_set()) return mpi_global.init_status;
-
- mpi_global.device = -1;
-#if GOOGLE_CUDA
- if (gpu) {
- cudaGetDevice(&mpi_global.device);
- }
-#endif
-
- // Start the MPI background thread, which assumes MPI is initialized
- // TODO: Change this to a Tensorflow thread
- mpi_global.background_thread = std::thread(BackgroundThreadLoop);
-
- // Wait to ensure that the background thread has finished initializing MPI
- mutex_lock guard(mpi_global.mu);
- mpi_global.cv.wait(guard);
- if (!mpi_global.initialization_done) {
- mpi_global.init_status =
- errors::Unknown("Failed to wait for MPI initialization.");
- }
-
- return mpi_global.init_status;
-}
-
-// Check that MPI is initialized.
-Status IsMPIInitialized() {
- if (!mpi_global.initialization_done) {
- return errors::FailedPrecondition(
- "MPI has not been initialized; use tf.contrib.mpi.Session.");
- }
- return Status::OK();
-}
-
-// This function (called from the callback set up in MPIAll*Op::ComputeAsync)
-// only adds the op's record into the local op queue (to track the op's
-// progress), and sends a message to the coordinator indicating that this rank
-// is ready to begin. The MPI background thread will handle the MPI message.
-void EnqueueTensorCollective(CollectiveOpRecord record,
- MPIRequest::RequestType rtype) {
- const Tensor* input_tensor = record.in_t;
- MPIRequest message;
- message.set_request_rank(record.rank);
- message.set_tensor_name(record.name);
- message.set_tensor_type(record.dtype);
- message.set_request_type(rtype);
- input_tensor->shape().AsProto(message.mutable_tensor_shape());
-
- mutex_lock guard(mpi_global.mu);
- mpi_global.tensor_table.emplace(record.name, record);
- mpi_global.message_queue.push(message);
-}
-
-} // namespace
-
-#if GOOGLE_CUDA
-cudaStream_t CudaStreamForMPI() { return mpi_global.stream; }
-#endif
-
-// Op to initialize MPI in the current process. The settings used in the
-// configuration are the same that must be used for all future MPI ops.
-template <typename Device>
-class MPIInitOp : public OpKernel {
- public:
- explicit MPIInitOp(OpKernelConstruction* context) : OpKernel(context) {}
-
- void Compute(OpKernelContext* context) override {
- bool on_gpu = IsGPUDevice<Device>();
- OP_REQUIRES_OK(context, InitializeMPIOnce(on_gpu));
- }
-};
-
-REGISTER_KERNEL_BUILDER(Name("MPIInit").Device(DEVICE_CPU),
- MPIInitOp<CPUDevice>);
-#if GOOGLE_CUDA
-REGISTER_KERNEL_BUILDER(Name("MPIInit").Device(DEVICE_GPU),
- MPIInitOp<GPUDevice>);
-#endif
-
-REGISTER_OP("MPIInit").Doc(R"doc(
-Initialize MPI for the current process.
-
-If this is run on a GPU, then that GPU must be used for all future MPI
-operations. If it is run on CPU, then all future MPI operations must also
-run on CPU.
-)doc");
-
-// Op to get the current MPI Size.
-template <typename Device>
-class MPISizeOp : public OpKernel {
- public:
- explicit MPISizeOp(OpKernelConstruction* context) : OpKernel(context) {}
-
- void Compute(OpKernelContext* context) override {
- OP_REQUIRES_OK(context, IsMPIInitialized());
-
- // Write integer to output tensor
- Tensor* output;
- OP_REQUIRES_OK(context,
- context->allocate_output(0, TensorShape({}), &output));
-
- auto flat = output->flat<int>();
- flat(0) = mpi_global.size;
- }
-};
-
-REGISTER_KERNEL_BUILDER(Name("MPISize").Device(DEVICE_CPU),
- MPISizeOp<CPUDevice>);
-#if GOOGLE_CUDA
-REGISTER_KERNEL_BUILDER(Name("MPISize").Device(DEVICE_GPU).HostMemory("size"),
- MPISizeOp<GPUDevice>);
-#endif
-
-REGISTER_OP("MPISize")
- .Output("size: int32")
- .SetShapeFn([](shape_inference::InferenceContext* c) {
- c->set_output(0, c->Scalar());
- return Status::OK();
- })
- .Doc(R"doc(
-Returns the number of running MPI processes.
-
-More precisely, returns the number of MPI processes in the group associated
-with the MPI_COMM_WORLD communicator.
-
-size: Size of the MPI group.
-)doc");
-
-// Op to get the current MPI Rank.
-template <typename Device>
-class MPIRankOp : public OpKernel {
- public:
- explicit MPIRankOp(OpKernelConstruction* context) : OpKernel(context) {}
-
- void Compute(OpKernelContext* context) override {
- OP_REQUIRES_OK(context, IsMPIInitialized());
-
- // Write integer to output tensor
- Tensor* output;
- OP_REQUIRES_OK(context,
- context->allocate_output(0, TensorShape({}), &output));
-
- auto flat = output->flat<int>();
- flat(0) = mpi_global.rank;
- }
-};
-
-REGISTER_KERNEL_BUILDER(Name("MPIRank").Device(DEVICE_CPU),
- MPIRankOp<CPUDevice>);
-#if GOOGLE_CUDA
-REGISTER_KERNEL_BUILDER(Name("MPIRank").Device(DEVICE_GPU).HostMemory("rank"),
- MPIRankOp<GPUDevice>);
-#endif
-
-REGISTER_OP("MPIRank")
- .Output("rank: int32")
- .SetShapeFn([](shape_inference::InferenceContext* c) {
- c->set_output(0, c->Scalar());
- return Status::OK();
- })
- .Doc(R"doc(
-Returns the index of the current process in the MPI group.
-
-More precisely, returns the rank of the calling process in the MPI_COMM_WORLD
-communicator.
-
-rank: Rank of the calling process.
-)doc");
-
-// Op to get the current local MPI Rank.
-template <typename Device>
-class MPILocalRankOp : public OpKernel {
- public:
- explicit MPILocalRankOp(OpKernelConstruction* context) : OpKernel(context) {}
-
- void Compute(OpKernelContext* context) override {
- OP_REQUIRES_OK(context, IsMPIInitialized());
-
- // Write integer to output tensor
- Tensor* output;
- OP_REQUIRES_OK(context,
- context->allocate_output(0, TensorShape({}), &output));
-
- auto flat = output->flat<int>();
- flat(0) = mpi_global.local_rank;
- }
-};
-
-REGISTER_KERNEL_BUILDER(Name("MPILocalRank").Device(DEVICE_CPU),
- MPILocalRankOp<CPUDevice>);
-#if GOOGLE_CUDA
-REGISTER_KERNEL_BUILDER(
- Name("MPILocalRank").Device(DEVICE_GPU).HostMemory("rank"),
- MPILocalRankOp<GPUDevice>);
-#endif
-
-REGISTER_OP("MPILocalRank")
- .Output("rank: int32")
- .SetShapeFn([](shape_inference::InferenceContext* c) {
- c->set_output(0, c->Scalar());
- return Status::OK();
- })
- .Doc(R"doc(
-Returns the index of the current process in the node it is on.
-
-More precisely, returns the rank of the calling process in communicator that
-only spans the MPI processes running on that node.
-
-rank: Rank of the calling process on the node it is on.
-)doc");
-
-template <typename Device>
-class MPIAllreduceOp : public AsyncOpKernel {
- public:
- explicit MPIAllreduceOp(OpKernelConstruction* context)
- : AsyncOpKernel(context) {}
-
- // Although this op is handled asynchronously, the ComputeAsync call is
- // very inexpensive. It only sets up a CollectiveOpRecord and places it
- // in the table for the background thread to handle. Thus, we do not need
- // a TF pool thread to perform the op.
- bool IsExpensive() override { return false; }
-
- void ComputeAsync(OpKernelContext* context, DoneCallback done) override {
- OP_REQUIRES_OK_ASYNC(context, IsMPIInitialized(), done);
- const Tensor* input_tensor = &context->input(0);
- Tensor* output_tensor;
- OP_REQUIRES_OK_ASYNC(
- context,
- context->allocate_output(0, input_tensor->shape(), &output_tensor),
- done);
-
- // Record allocated on stack so op can fail without memory leak
- CollectiveOpRecord record;
- record.name = name();
- record.context = context;
- record.in_t = input_tensor;
- record.out_t = output_tensor;
- record.on_gpu = IsGPUDevice<Device>();
- record.dtype = input_tensor->dtype();
-
- const size_t temp_size =
- (input_tensor->NumElements() + mpi_global.size - 1) / mpi_global.size;
- TensorShape temp_shape;
- temp_shape.AddDim(temp_size);
- OP_REQUIRES_OK_ASYNC(context,
- context->allocate_temp(input_tensor->dtype(),
- temp_shape, &record.temp_t),
- done);
-
- auto allreduce_done_callback = [done, context](StatusOr<Tensor> status) {
- context->SetStatus(status.status());
- done();
- };
- record.callback = allreduce_done_callback;
-
- auto allreduce_launch_callback = [record] {
- EnqueueTensorCollective(record, MPIRequest::ALLREDUCE);
- };
-
- // If we are on a CPU, our device context will be null and we can't
- // get a stream to enqueue this on. On a CPU this op is called when the
- // data is already available, so we can just immediately do the
- // allreduce; we don't have to wait for the data to get populated.
-#if GOOGLE_CUDA
- auto device_context = context->op_device_context();
- if (device_context == nullptr) {
- allreduce_launch_callback();
- } else {
- auto stream = device_context->stream();
- stream->ThenDoHostCallback(allreduce_launch_callback);
- }
-#else
- allreduce_launch_callback();
-#endif
- }
-};
-
-REGISTER_KERNEL_BUILDER(Name("MPIAllreduce").Device(DEVICE_CPU),
- MPIAllreduceOp<CPUDevice>);
-#if GOOGLE_CUDA
-REGISTER_KERNEL_BUILDER(Name("MPIAllreduce").Device(DEVICE_GPU),
- MPIAllreduceOp<GPUDevice>);
-#endif
-
-REGISTER_OP("MPIAllreduce")
- .Attr("T: {int32, int64, float32}")
- .Input("tensor: T")
- .Output("sum: T")
- .SetShapeFn([](shape_inference::InferenceContext* c) {
- c->set_output(0, c->input(0));
- return Status::OK();
- })
- .Doc(R"doc(
-Perform an MPI Allreduce on a tensor. All other processes that do a reduction
-on a tensor with the same name must have the same dimension for that tensor.
-Tensors are reduced with other tensors that have the same node name for the
-allreduce.
-
-Arguments
- tensor: A tensor to reduce.
-
-Output
- sum: A tensor with the same shape as `tensor`, summed across all
- MPI processes.
-)doc");
-
-template <typename Device>
-class MPIAllgatherOp : public AsyncOpKernel {
- public:
- explicit MPIAllgatherOp(OpKernelConstruction* context)
- : AsyncOpKernel(context) {}
-
- // Although this op is handled asynchronously, the ComputeAsync call is
- // very inexpensive. It only sets up a CollectiveOpRecord and places it
- // in the table for the background thread to handle. Thus, we do not need
- // a TF pool thread to perform the op.
- bool IsExpensive() override { return false; }
-
- void ComputeAsync(OpKernelContext* context, DoneCallback done) override {
- OP_REQUIRES_OK_ASYNC(context, IsMPIInitialized(), done);
- const Tensor* input_tensor = &context->input(0);
- const Tensor* sizing_tensor = &context->input(1);
-
- // Record allocated on stack so op can fail without memory leak
- CollectiveOpRecord record;
- record.name = name();
- record.context = context;
- record.in_t = input_tensor;
- record.on_gpu = IsGPUDevice<Device>();
-
- // Construct the output size from the sizing tensor
- size_t output_first_dim = 0;
- if (sizing_tensor->shape().dims() == 0) {
- // 0-dim sizing_tensor implies that the op is just gathering
- // a single element from each rank
- output_first_dim = mpi_global.size;
- for (int i = 0; i < mpi_global.size; i++) {
- record.sizes_vec.push_back(1);
- }
- } else {
- // Collect the total output tensor sizing from the sizing tensor
- // NOTE: The sizing tensor is forced to be placed on the CPU by
- // declaring the input as HostMemory, so it is valid to read it here.
- const int64* sizing_array =
- (const int64*)sizing_tensor->tensor_data().data();
- for (int i = 0; i < mpi_global.size; i++) {
- record.sizes_vec.push_back(sizing_array[i]);
- output_first_dim += sizing_array[i];
- }
- }
-
- TensorShape output_shape;
- output_shape.AddDim(output_first_dim);
- for (int i = 1; i < input_tensor->shape().dims(); i++) {
- output_shape.AddDim(input_tensor->shape().dim_size(i));
- }
-
- Tensor* output_tensor;
- OP_REQUIRES_OK_ASYNC(
- context, context->allocate_output(0, output_shape, &output_tensor),
- done);
-
- record.out_t = output_tensor;
- record.dtype = input_tensor->dtype();
-
- auto allgather_done_callback = [done, context](StatusOr<Tensor> status) {
- context->SetStatus(status.status());
- done();
- };
- record.callback = allgather_done_callback;
-
- auto allgather_launch_callback = [record] {
- EnqueueTensorCollective(record, MPIRequest::ALLGATHER);
- };
-
- // If we are on a CPU, our device context will be null and we can't
- // get a stream to enqueue this on. On a CPU this op is called when the
- // data is already available, so we can just immediately do the
- // allgather; we don't have to wait for the data to get populated.
-#if GOOGLE_CUDA
- auto device_context = context->op_device_context();
- if (device_context == nullptr) {
- allgather_launch_callback();
- } else {
- auto stream = device_context->stream();
- stream->ThenDoHostCallback(allgather_launch_callback);
- }
-#else
- allgather_launch_callback();
-#endif
- }
-};
-
-REGISTER_OP("MPIAllgather")
- .Attr("T: {int32, int64, float32}")
- .Attr("S: {int64}")
- .Input("tensor: T")
- .Input("sizes: S")
- .Output("gathered: T")
- .SetShapeFn([](shape_inference::InferenceContext* c) {
- shape_inference::ShapeHandle output;
- TF_RETURN_IF_ERROR(
- c->ReplaceDim(c->input(0), 0, c->UnknownDim(), &output));
- c->set_output(0, output);
- return Status::OK();
- })
- .Doc(R"doc(
-Perform an MPI Allgather on a tensor. All other processes that do a gather on a
-tensor with the same name must have the same rank for that tensor, and have the
-same dimension on all but the first dimension.
-
-Arguments
- tensor: A tensor to gather.
- sizes: A tensor containing the first-dimension sizes of tensors to be
- gathered from other ranks
-
-Output
- gathered: A tensor with the same shape as `tensor` except for the first
- dimension, which is the sum of dimensions in `sizes`.
-)doc");
-
-REGISTER_KERNEL_BUILDER(
- Name("MPIAllgather").Device(DEVICE_CPU).HostMemory("sizes"),
- MPIAllgatherOp<CPUDevice>);
-#if GOOGLE_CUDA
-REGISTER_KERNEL_BUILDER(
- Name("MPIAllgather").Device(DEVICE_GPU).HostMemory("sizes"),
- MPIAllgatherOp<GPUDevice>);
-#endif
-
-} // namespace mpi
-} // namespace contrib
-} // namespace tensorflow
-
-#endif // TENSORFLOW_USE_MPI
diff --git a/tensorflow/contrib/mpi_collectives/mpi_ops.py b/tensorflow/contrib/mpi_collectives/mpi_ops.py
deleted file mode 100644
index 81567cc..0000000
--- a/tensorflow/contrib/mpi_collectives/mpi_ops.py
+++ /dev/null
@@ -1,165 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-"""Inter-process communication using MPI."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import load_library
-from tensorflow.python.framework import ops
-from tensorflow.python.platform import resource_loader
-from tensorflow.python.platform import tf_logging as logging
-
-
-def _load_library(name, op_list=None):
- """Loads a .so file containing the specified operators.
-
- Args:
- name: The name of the .so file to load.
- op_list: A list of names of operators that the library should have. If None
- then the .so file's contents will not be verified.
-
- Raises:
- NameError if one of the required ops is missing.
- """
- try:
- filename = resource_loader.get_path_to_datafile(name)
- library = load_library.load_op_library(filename)
- for expected_op in (op_list or []):
- for lib_op in library.OP_LIST.op:
- if lib_op.name == expected_op:
- break
- else:
- raise NameError(
- 'Could not find operator %s in dynamic library %s' %
- (expected_op, name))
- return library
- except errors.NotFoundError:
- logging.warning('%s file could not be loaded.', name)
-
-
-MPI_LIB = _load_library('mpi_collectives.so', ['MPISize', 'MPIRank',
- 'MPILocalRank', 'MPIAllgather',
- 'MPIAllreduce'])
-
-
-def size(name=None):
- """An op which returns the number of MPI processes.
-
- This is equivalent to running `MPI_Comm_size(MPI_COMM_WORLD, ...)` to get the
- size of the global communicator.
-
- Returns:
- An integer scalar containing the number of MPI processes.
- """
- return MPI_LIB.mpi_size(name=name)
-
-
-ops.NotDifferentiable('MPISize')
-
-
-def rank(name=None):
- """An op which returns the MPI rank of the calling process.
-
- This is equivalent to running `MPI_Comm_rank(MPI_COMM_WORLD, ...)` to get the
- rank of the current process in the global communicator.
-
- Returns:
- An integer scalar with the MPI rank of the calling process.
- """
- return MPI_LIB.mpi_rank(name=name)
-
-
-ops.NotDifferentiable('MPIRank')
-
-
-def init(name=None):
- """An op which initializes MPI on the device on which it is run.
-
- All future MPI ops must be run on the same device that the `init` op was run
- on.
- """
- return MPI_LIB.mpi_init(name=name)
-
-
-ops.NotDifferentiable('MPIInit')
-
-
-def local_rank(name=None):
- """An op which returns the local MPI rank of the calling process, within the
- node that it is running on. For example, if there are seven processes running
- on a node, their local ranks will be zero through six, inclusive.
-
- This is equivalent to running `MPI_Comm_rank(...)` on a new communicator
- which only includes processes on the same node.
-
- Returns:
- An integer scalar with the local MPI rank of the calling process.
- """
- return MPI_LIB.mpi_local_rank(name=name)
-
-
-ops.NotDifferentiable('MPILocalRank')
-
-
-def _allreduce(tensor, name=None):
- """An op which sums an input tensor over all the MPI processes.
-
- The reduction operation is keyed by the name of the op. The tensor type and
- shape must be the same on all MPI processes for a given name. The reduction
- will not start until all processes are ready to send and receive the tensor.
-
- Returns:
- A tensor of the same shape and type as `tensor`, summed across all
- processes.
- """
- return MPI_LIB.mpi_allreduce(tensor, name=name)
-
-
-ops.NotDifferentiable('MPIAllreduce')
-
-
-def allgather(tensor, name=None):
- """An op which concatenates the input tensor with the same input tensor on
- all other MPI processes.
-
- The concatenation is done on the first dimension, so the input tensors on the
- different processes must have the same rank and shape, except for the first
- dimension, which is allowed to be different.
-
- Returns:
- A tensor of the same type as `tensor`, concatenated on dimension zero
- across all processes. The shape is identical to the input shape, except for
- the first dimension, which may be greater and is the sum of all first
- dimensions of the tensors in different MPI processes.
- """
- # Specify that first allgather is to collect the tensor gather sizes,
- # indicated by passing in a scalar (0-D tensor) of value 0
- sizes_flag = tf.constant(0, dtype=tf.int64, name="size_flag_const")
- my_size = tf.slice(tf.shape(tensor, out_type=tf.int64), [0], [1], name="size_slice")
- if name is None:
- name = "allgather"
- sizing_name = "{}_sizing".format(name)
- sizes = MPI_LIB.mpi_allgather(my_size, sizes_flag, name=sizing_name)
- return MPI_LIB.mpi_allgather(tensor, sizes, name=name)
-
-
-ops.NotDifferentiable('MPIAllgather')
-
-
diff --git a/tensorflow/contrib/mpi_collectives/ring.cc b/tensorflow/contrib/mpi_collectives/ring.cc
deleted file mode 100644
index d93233e..0000000
--- a/tensorflow/contrib/mpi_collectives/ring.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifdef TENSORFLOW_USE_MPI
-
-#define EIGEN_USE_THREADS
-
-#include "tensorflow/contrib/mpi_collectives/ring.h"
-
-namespace tensorflow {
-namespace contrib {
-namespace mpi {
-
-using CPUDevice = Eigen::ThreadPoolDevice;
-
-extern template MPI_Datatype MPIType<float>();
-extern template MPI_Datatype MPIType<int>();
-extern template MPI_Datatype MPIType<long long>();
-extern template DataType TensorFlowDataType<float>();
-extern template DataType TensorFlowDataType<int>();
-extern template DataType TensorFlowDataType<long long>();
-
-// Generate all necessary specializations for RingAllreduce.
-template Status RingAllreduce<CPUDevice, int>(OpKernelContext*, const Tensor*,
- Tensor*, Tensor*);
-template Status RingAllreduce<CPUDevice, long long>(OpKernelContext*,
- const Tensor*, Tensor*,
- Tensor*);
-template Status RingAllreduce<CPUDevice, float>(OpKernelContext*, const Tensor*,
- Tensor*, Tensor*);
-
-// Generate all necessary specializations for RingAllgather.
-template Status RingAllgather<CPUDevice, int>(OpKernelContext*, const Tensor*,
- const std::vector<size_t>&,
- Tensor*);
-template Status RingAllgather<CPUDevice, long long>(OpKernelContext*,
- const Tensor*,
- const std::vector<size_t>&,
- Tensor*);
-template Status RingAllgather<CPUDevice, float>(OpKernelContext*, const Tensor*,
- const std::vector<size_t>&,
- Tensor*);
-
-// Copy data on a CPU using a straight-forward memcpy.
-template <>
-void CopyTensorData<CPUDevice>(void* dst, void* src, size_t size) {
- std::memcpy(dst, src, size);
-};
-
-// Accumulate values on a CPU.
-#define GENERATE_ACCUMULATE(type) \
- template <> \
- void AccumulateTensorData<CPUDevice, type>(type * dst, type * src, \
- size_t size) { \
- for (unsigned int i = 0; i < size; i++) { \
- dst[i] += src[i]; \
- } \
- };
-GENERATE_ACCUMULATE(int);
-GENERATE_ACCUMULATE(long long);
-GENERATE_ACCUMULATE(float);
-#undef GENERATE_ACCUMULATE
-
-} // namespace mpi
-} // namespace contrib
-} // namespace tensorflow
-
-#endif // TENSORFLOW_USE_MPI
diff --git a/tensorflow/contrib/mpi_collectives/ring.cu.cc b/tensorflow/contrib/mpi_collectives/ring.cu.cc
deleted file mode 100644
index 2f3eef3..0000000
--- a/tensorflow/contrib/mpi_collectives/ring.cu.cc
+++ /dev/null
@@ -1,117 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifdef TENSORFLOW_USE_MPI
-
-#if GOOGLE_CUDA
-
-#define EIGEN_USE_GPU
-
-#include "tensorflow/contrib/mpi_collectives/ring.h"
-
-namespace tensorflow {
-namespace contrib {
-namespace mpi {
-
-using CPUDevice = Eigen::ThreadPoolDevice;
-
-template <>
-MPI_Datatype MPIType<float>() {
- return MPI_FLOAT;
-};
-template <>
-MPI_Datatype MPIType<int>() {
- return MPI_INT;
-};
-template <>
-MPI_Datatype MPIType<long long>() {
- return MPI_LONG_LONG;
-};
-
-template <>
-DataType TensorFlowDataType<float>() {
- return DT_FLOAT;
-};
-template <>
-DataType TensorFlowDataType<int>() {
- return DT_INT32;
-};
-template <>
-DataType TensorFlowDataType<long long>() {
- return DT_INT64;
-};
-
-// Generate all necessary specializations for RingAllreduce.
-template Status RingAllreduce<GPUDevice, int>(OpKernelContext*, const Tensor*,
- Tensor*, Tensor*);
-template Status RingAllreduce<GPUDevice, long long>(OpKernelContext*,
- const Tensor*, Tensor*,
- Tensor*);
-template Status RingAllreduce<GPUDevice, float>(OpKernelContext*, const Tensor*,
- Tensor*, Tensor*);
-
-// Generate all necessary specializations for RingAllgather.
-template Status RingAllgather<GPUDevice, int>(OpKernelContext*, const Tensor*,
- const std::vector<size_t>&,
- Tensor*);
-template Status RingAllgather<GPUDevice, long long>(OpKernelContext*,
- const Tensor*,
- const std::vector<size_t>&,
- Tensor*);
-template Status RingAllgather<GPUDevice, float>(OpKernelContext*, const Tensor*,
- const std::vector<size_t>&,
- Tensor*);
-
-// Synchronously copy data on the GPU, using a different stream than the default
-// and than TensorFlow to avoid synchronizing on operations unrelated to the
-// allreduce.
-template <>
-void CopyTensorData<GPUDevice>(void* dst, void* src, size_t size) {
- auto stream = CudaStreamForMPI();
- cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToDevice, stream);
- cudaStreamSynchronize(stream);
-};
-
-// Elementwise accumulation kernel for GPU.
-template <typename T>
-__global__ void elemwise_accum(T* out, const T* in, const size_t N) {
- for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
- i += blockDim.x * gridDim.x) {
- out[i] += in[i];
- }
-}
-
-// Synchronously accumulate tensors on the GPU, using a different stream than
-// the default and than TensorFlow to avoid synchronizing on operations
-// unrelated to the allreduce.
-#define GENERATE_ACCUMULATE(type) \
- template <> \
- void AccumulateTensorData<GPUDevice, type>(type * dst, type * src, \
- size_t size) { \
- auto stream = CudaStreamForMPI(); \
- elemwise_accum<type><<<32, 256, 0, stream>>>(dst, src, size); \
- cudaStreamSynchronize(stream); \
- };
-GENERATE_ACCUMULATE(int);
-GENERATE_ACCUMULATE(long long);
-GENERATE_ACCUMULATE(float);
-#undef GENERATE_ACCUMULATE
-
-} // namespace mpi
-} // namespace contrib
-} // namespace tensorflow
-#endif // GOOGLE_CUDA
-
-#endif // TENSORFLOW_USE_MPI
diff --git a/tensorflow/contrib/mpi_collectives/ring.h b/tensorflow/contrib/mpi_collectives/ring.h
deleted file mode 100644
index cae57ce..0000000
--- a/tensorflow/contrib/mpi_collectives/ring.h
+++ /dev/null
@@ -1,327 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CONTRIB_MPI_H_
-#define TENSORFLOW_CONTRIB_MPI_H_
-
-#ifdef TENSORFLOW_USE_MPI
-
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/shape_inference.h"
-
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/framework/tensor_types.h"
-
-#if GOOGLE_CUDA
-#include "cuda_runtime.h"
-#endif
-
-// Needed to avoid header issues with C++-supporting MPI implementations
-#define OMPI_SKIP_MPICXX
-#include "third_party/mpi/mpi.h"
-
-#define TAG_TENSOR 12
-
-namespace tensorflow {
-namespace contrib {
-namespace mpi {
-
-using CPUDevice = Eigen::ThreadPoolDevice;
-using GPUDevice = Eigen::GpuDevice;
-
-// Convert from templated types to values we can pass to MPI.
-template <typename T>
-MPI_Datatype MPIType();
-
-// Convert from templated types to TensorFlow data types.
-template <typename T>
-DataType TensorFlowDataType();
-
-#define MPI_REQUIRES_OK(MPI_STATUS) \
- if ((MPI_STATUS) != MPI_SUCCESS) { \
- return errors::Unknown("MPI operation failed unexpectedly."); \
- }
-
-// Copy data from one tensor to another tensor.
-// This uses a custom CUDA stream on GPU, which is necessary to overlay the
-// backpropagation computations with the allreduce.
-template <typename Device>
-void CopyTensorData(void* destination, void* source, size_t size);
-
-// Add a tensor into another tensor, accumulating in place.
-// This uses a custom CUDA stream on GPU, which is necessary to overlay the
-// backpropagation computations with the allreduce.
-template <typename Device, typename T>
-void AccumulateTensorData(T* destination, T* source, size_t size);
-
-// We need to get the right stream for doing CUDA memory transfers and
-// operations, which is possibly different from the standard TensorFlow stream.
-#if GOOGLE_CUDA
-cudaStream_t CudaStreamForMPI();
-#endif
-
-/* Perform a ring allreduce on the data. Allocate the necessary output tensor
- * and store it in the output parameter.
- *
- * Assumes that all MPI processes are doing an allreduce of the same tensor,
- * with the same dimensions.
- *
- * A ring allreduce is a bandwidth-optimal way to do an allreduce. To do the
- * allreduce, the nodes involved are arranged in a ring:
- *
- * .--0--.
- * / \
- * 3 1
- * \ /
- * *--2--*
- *
- * Each node always sends to the next clockwise node in the ring, and receives
- * from the previous one.
- *
- * The allreduce is done in two parts: a scatter-reduce and an allgather. In
- * the scatter reduce, a reduction is done, so that each node ends up with a
- * chunk of the final output tensor which has contributions from all other
- * nodes. In the allgather, those chunks are distributed among all the nodes,
- * so that all nodes have the entire output tensor.
- *
- * Both of these operations are done by dividing the input tensor into N
- * evenly sized chunks (where N is the number of nodes in the ring).
- *
- * The scatter-reduce is done in N-1 steps. In the ith step, node j will send
- * the (j - i)th chunk and receive the (j - i - 1)th chunk, adding it in to
- * its existing data for that chunk. For example, in the first iteration with
- * the ring depicted above, you will have the following transfers:
- *
- * Segment 0: Node 0 --> Node 1
- * Segment 1: Node 1 --> Node 2
- * Segment 2: Node 2 --> Node 3
- * Segment 3: Node 3 --> Node 0
- *
- * In the second iteration, you'll have the following transfers:
- *
- * Segment 0: Node 1 --> Node 2
- * Segment 1: Node 2 --> Node 3
- * Segment 2: Node 3 --> Node 0
- * Segment 3: Node 0 --> Node 1
- *
- * After this iteration, Node 2 has 3 of the four contributions to Segment 0.
- * The last iteration has the following transfers:
- *
- * Segment 0: Node 2 --> Node 3
- * Segment 1: Node 3 --> Node 0
- * Segment 2: Node 0 --> Node 1
- * Segment 3: Node 1 --> Node 2
- *
- * After this iteration, Node 3 has the fully accumulated Segment 0; Node 0
- * has the fully accumulated Segment 1; and so on. The scatter-reduce is
- * complete.
- *
- * Next, the allgather distributes these fully accumululated chunks across all
- * nodes. Communication proceeds in the same ring, once again in N-1 steps. At
- * the ith step, node j will send chunk (j - i + 1) and receive chunk (j - i).
- * For example, at the first iteration, the following transfers will occur:
- *
- * Segment 0: Node 3 --> Node 0
- * Segment 1: Node 0 --> Node 1
- * Segment 2: Node 1 --> Node 2
- * Segment 3: Node 2 --> Node 3
- *
- * After the first iteration, Node 0 will have a fully accumulated Segment 0
- * (from Node 3) and Segment 1. In the next iteration, Node 0 will send its
- * just-received Segment 0 onward to Node 1, and receive Segment 3 from Node 3.
- * After this has continued for N - 1 iterations, all nodes will have a the
- * fully accumulated tensor.
- *
- * Each node will do (N-1) sends for the scatter-reduce and (N-1) sends for the
- * allgather. Each send will contain K / N bytes, if there are K bytes in the
- * original tensor on every node. Thus, each node sends and receives 2K(N - 1)/N
- * bytes of data, and the performance of the allreduce (assuming no latency in
- * connections) is constrained by the slowest interconnect between the nodes.
- *
- */
-template <typename Device, typename T>
-Status RingAllreduce(OpKernelContext* context, const Tensor* input,
- Tensor* temp, Tensor* output) {
- // Acquire MPI size and rank
- int n, r;
- MPI_REQUIRES_OK(MPI_Comm_size(MPI_COMM_WORLD, &n));
- MPI_REQUIRES_OK(MPI_Comm_rank(MPI_COMM_WORLD, &r));
-
- T* buffer = (T*)output->tensor_data().data();
-
- CopyTensorData<Device>((void*)buffer, (void*)input->tensor_data().data(),
- output->tensor_data().size());
-
- // Calculate segment sizes and segment ends
- const size_t elements_to_reduce = input->NumElements();
- const size_t segment_size = elements_to_reduce / n;
- std::vector<size_t> segment_sizes(n, segment_size);
-
- const size_t residual = elements_to_reduce % n;
- for (size_t i = 0; i < residual; ++i) {
- segment_sizes[i]++;
- }
-
- std::vector<size_t> segment_starts(n);
- segment_starts[0] = 0;
- for (size_t i = 1; i < segment_starts.size(); ++i) {
- segment_starts[i] = segment_starts[i - 1] + segment_sizes[i - 1];
- }
-
- assert(segment_starts[n - 1] + segment_sizes[n - 1] == elements_to_reduce);
-
- T* segment_recv = (T*)temp->tensor_data().data();
-
- // Receive from your left neighbor with wrap-around
- const size_t recv_from = ((r - 1) + n) % n;
-
- // Send to your right neighbor with wrap-around
- const size_t send_to = (r + 1) % n;
-
- MPI_Status recv_status;
- MPI_Request recv_req;
-
- // Now start ring. At every step, for every rank, we iterate through
- // segments with wraparound and send and recv from our neighbors and reduce
- // locally. At the i'th iteration, rank r, sends segment (r-i) and receives
- // segment (r-i-1).
- for (int i = 0; i < n - 1; i++) {
- const size_t send_seg_id = ((r - i) + n) % n;
- const size_t recv_seg_id = ((r - i - 1) + n) % n;
-
- T* segment_send = &(buffer[segment_starts[send_seg_id]]);
-
- MPI_REQUIRES_OK(MPI_Irecv(segment_recv, segment_sizes[recv_seg_id],
- MPIType<T>(), recv_from, TAG_TENSOR,
- MPI_COMM_WORLD, &recv_req));
-
- MPI_REQUIRES_OK(MPI_Send(segment_send, segment_sizes[send_seg_id],
- MPIType<T>(), send_to, TAG_TENSOR,
- MPI_COMM_WORLD));
-
- T* segment_update = &(buffer[segment_starts[recv_seg_id]]);
-
- // Wait for recv to complete before reduction
- MPI_REQUIRES_OK(MPI_Wait(&recv_req, &recv_status));
-
- const size_t recv_seg_size = segment_sizes[recv_seg_id];
- AccumulateTensorData<Device, T>(segment_update, segment_recv,
- recv_seg_size);
- }
-
- // Now start pipelined ring allgather. At every step, for every rank, we
- // iterate through segments with wraparound and send and recv from our
- // neighbors. At the i'th iteration, rank r, sends segment (r-i+1) and
- // receives segment (r-i).
- for (size_t i = 0; i < n - 1; ++i) {
- const size_t send_seg_id = ((r - i + 1) + n) % n;
- const size_t recv_seg_id = ((r - i) + n) % n;
-
- // Segment to send - at every iteration we send segment (r-i+1)
- T* segment_send = &(buffer[segment_starts[send_seg_id]]);
-
- // Segment to recv - at every iteration we receive segment (r-i)
- T* segment_recv = &(buffer[segment_starts[recv_seg_id]]);
-
- MPI_REQUIRES_OK(MPI_Sendrecv(
- segment_send, segment_sizes[send_seg_id], MPIType<T>(), send_to,
- TAG_TENSOR, segment_recv, segment_sizes[recv_seg_id], MPIType<T>(),
- recv_from, TAG_TENSOR, MPI_COMM_WORLD, &recv_status));
- }
-
- return Status::OK();
-}
-
-// Perform a ring allgather on a Tensor. Other ranks may allgather with a
-// tensor which differs in the first dimension only; all other dimensions must
-// be the same.
-//
-// For more information on the ring allgather, read the documentation for the
-// ring allreduce, which includes a ring allgather.
-template <typename Device, typename T>
-Status RingAllgather(OpKernelContext* context, const Tensor* input,
- const std::vector<size_t>& sizes, Tensor* output) {
- // Acquire MPI size and rank
- int n, r;
- MPI_REQUIRES_OK(MPI_Comm_size(MPI_COMM_WORLD, &n));
- MPI_REQUIRES_OK(MPI_Comm_rank(MPI_COMM_WORLD, &r));
-
- assert(sizes.size() == n);
- assert(input->dim_size(0) == sizes[r]);
-
- // Compute number of elements in every "row". We can't compute number of
- // elements in every chunks, because those chunks are variable length.
- size_t elements_per_row = 1;
- for (int i = 1; i < input->shape().dims(); i++) {
- elements_per_row *= input->dim_size(i);
- }
-
- // Copy data from input tensor to correct place in output tensor.
- std::vector<size_t> segment_starts(n);
- segment_starts[0] = 0;
- for (int i = 1; i < n; i++) {
- segment_starts[i] = segment_starts[i - 1] + elements_per_row * sizes[i - 1];
- }
- size_t offset = segment_starts[r];
-
- // Copy data to the right offset for this rank.
- T* buffer = (T*)output->tensor_data().data();
- CopyTensorData<Device>((void*)(buffer + offset),
- (void*)input->tensor_data().data(),
- elements_per_row * sizes[r] * sizeof(T));
-
- // Receive from your left neighbor with wrap-around
- const size_t recv_from = ((r - 1) + n) % n;
-
- // Send to your right neighbor with wrap-around
- const size_t send_to = (r + 1) % n;
-
- // Perform a ring allgather. At every step, for every rank, we iterate
- // through segments with wraparound and send and recv from our neighbors.
- // At the i'th iteration, rank r, sends segment (r-i) and receives segment
- // (r-1-i).
- MPI_Status recv_status;
- for (size_t i = 0; i < n - 1; ++i) {
- const size_t send_seg_id = ((r - i) + n) % n;
- const size_t recv_seg_id = ((r - i - 1) + n) % n;
-
- // Segment to send - at every iteration we send segment (r-i)
- size_t offset_send = segment_starts[send_seg_id];
- size_t rows_send = sizes[send_seg_id];
- T* segment_send = &(buffer[offset_send]);
-
- // Segment to recv - at every iteration we receive segment (r-1-i)
- size_t offset_recv = segment_starts[recv_seg_id];
- size_t rows_recv = sizes[recv_seg_id];
- T* segment_recv = &(buffer[offset_recv]);
-
- MPI_REQUIRES_OK(MPI_Sendrecv(
- segment_send, elements_per_row * rows_send, MPIType<T>(), send_to,
- TAG_TENSOR, segment_recv, elements_per_row * rows_recv, MPIType<T>(),
- recv_from, TAG_TENSOR, MPI_COMM_WORLD, &recv_status));
- }
-
- return Status::OK();
-}
-
-} // namespace mpi
-} // namespace contrib
-} // namespace tensorflow
-
-#endif // TENSORFLOW_USE_MPI
-
-#undef TENSORFLOW_CONTRIB_MPI_H_
-#endif // TENSORFLOW_CONTRIB_MPI_H_
diff --git a/tensorflow/contrib/summary/summary_test_internal.py b/tensorflow/contrib/summary/summary_test_internal.py
deleted file mode 100644
index 80f60ae..0000000
--- a/tensorflow/contrib/summary/summary_test_internal.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Internal helpers for tests in this directory."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import functools
-import os
-import sqlite3
-
-from tensorflow.contrib.summary import summary_ops
-from tensorflow.python.framework import test_util
-
-
-class SummaryDbTest(test_util.TensorFlowTestCase):
- """Helper for summary database testing."""
-
- def setUp(self):
- super(SummaryDbTest, self).setUp()
- self.db_path = os.path.join(self.get_temp_dir(), 'DbTest.sqlite')
- if os.path.exists(self.db_path):
- os.unlink(self.db_path)
- self.db = sqlite3.connect(self.db_path)
- self.create_db_writer = functools.partial(
- summary_ops.create_db_writer,
- db_uri=self.db_path,
- experiment_name='experiment',
- run_name='run',
- user_name='user')
-
- def tearDown(self):
- self.db.close()
- super(SummaryDbTest, self).tearDown()
-
-
-def get_one(db, q, *p):
- return db.execute(q, p).fetchone()[0]
-
-
-def get_all(db, q, *p):
- return unroll(db.execute(q, p).fetchall())
-
-
-def unroll(list_of_tuples):
- return sum(list_of_tuples, ())
diff --git a/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay.py b/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay.py
deleted file mode 100644
index ed0f398..0000000
--- a/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay.py
+++ /dev/null
@@ -1,187 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""SGDR learning rate decay function."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import math_ops, control_flow_ops
-
-
-def sgdr_decay(learning_rate, global_step, initial_period_steps,
- t_mul=2.0, m_mul=1.0, name=None):
- """Implements Stochastic Gradient Descent with Warm Restarts (SGDR).
-
- As described in "SGDR: Stochastic Gradient Descent
- with Warm Restarts" by Ilya Loshchilov & Frank Hutter, Proceedings of
- ICLR'2017, available at https://arxiv.org/pdf/1608.03983.pdf
-
- The learning rate decreases according to cosine annealing:
-
- ```python
- learning_rate * 0.5 * (1 + cos(x_val * pi)) # for x_val defined in [0, 1]
- ```
-
- Thus, at the beginning (when the restart index i = 0),
- the learning rate decreases for `initial_period_steps` steps from the initial
- learning rate `learning_rate` (when `x_val=0`, we get `cos(0)=1`) to
- 0 (when `x_val=1`, we get `cos(pi)=-1`).
-
- The decrease within the i-th period takes `t_i` steps,
- where `t_0` = `initial_period_steps` is the user-defined number of batch
- iterations (not epochs as in the paper) to be performed before the first
- restart is launched.
-
- Then, we perform the first restart (i=1) by setting the learning rate to
- `learning_rate*(m_mul^i)`, where `m_mul in [0,1]` (set to 1 by default).
- The i-th restart runs for `t_i=t_0*(t_mul^i)` steps, i.e., every new
- restart runs `t_mul` times longer than the previous one.
-
- Importantly, when one has no access to a validation set, SGDR suggests
- to report the best expected / recommended solution in the following way:
- When we are within our initial run (i=0), every new solution represents
- SGDR's recommended solution. Instead, when i>0, the recommended solution is
- the one obtained at the end of each restart.
-
- Note that the minimum learning rate is set to 0 for simplicity,
- you can adjust the code to deal with any positive minimum learning rate
- as defined in the paper.
-
- `initial_period_steps` is the duration of the first period measured in terms
- of number of minibatch updates. If one wants to use epochs, one should compute
- the number of updates required for an epoch.
-
- For example, assume the following parameters and intention:
- Minibatch size: 100
- Training dataset size: 10000
- If the user wants the first decay period to span across 5 epochs, then
- `initial_period_steps` = 5 * 10000/100 = 500
-
- Train for 10000 batch iterations with the initial learning rate set to
- 0.1, then restart to run 2 times longer, i.e, for 20000 batch iterations
- and with the initial learning rate 0.05, then restart again and again,
- doubling the runtime of each new period and with two times smaller
- initial learning rate.
-
- To accomplish the above, one would write:
-
- ```python
- ...
- global_step = tf.Variable(0, trainable=False)
- starter_learning_rate = 0.1
- learning_rate = sgdr_decay(starter_learning_rate, global_step,
- initial_period_steps=10000, t_mul=2, m_mul=0.5)
- # Passing global_step to minimize() will increment it at each step.
- learning_step = (
- tf.train.GradientDescentOptimizer(learning_rate)
- .minimize(...my loss..., global_step=global_step)
- )
-
- # Step | 0 | 1000 | 5000 | 9000 | 9999 | 10000 | 11000 |
- # LR | 0.1 | 0.097 | 0.05 | 0.002 | 0.00 | 0.05 | 0.0496 |
-
- # Step | 20000 | 29000 | 29999 | 30000 |
- # LR | 0.025 | 0.0003 | 0.00 | 0.025 |
- ```
-
- Args:
- learning_rate: A scalar `float32` or `float64` `Tensor` or a
- Python number. The initial learning rate.
- global_step: A scalar `int32` or `int64` `Tensor` or a Python number.
- Global step to use for the decay computation. Must not be negative.
- initial_period_steps: Duration of the first period measured as the number
- of minibatch updates, if one wants to use epochs, one should compute
- the number of updates required for an epoch.
- t_mul: A scalar `float32` or `float64` `Tensor` or a Python number.
- Must be positive.
- Used to derive the number of iterations in the i-th period:
- `initial_period_steps * (t_mul^i)`. Defaults to 2.0.
- m_mul: A scalar `float32` or `float64` `Tensor` or a Python number.
- Must be positive.
- Used to derive the initial learning rate of the i-th period:
- `learning_rate * (m_mul^i)`. Defaults to 1.0
-
- Returns:
- A scalar `Tensor` of the same type as `learning_rate`.
- The learning rate for a provided global_step.
- Raises:
- ValueError: if `global_step` is not supplied.
- """
-
- if global_step is None:
- raise ValueError("global_step is required for sgdr_decay.")
- with ops.name_scope(name, "SGDRDecay",
- [learning_rate, global_step,
- initial_period_steps, t_mul, m_mul]) as name:
- learning_rate = ops.convert_to_tensor(learning_rate,
- name="initial_learning_rate")
- dtype = learning_rate.dtype
- global_step = math_ops.cast(global_step, dtype)
- t_0 = math_ops.cast(initial_period_steps, dtype)
- t_mul = math_ops.cast(t_mul, dtype)
- m_mul = math_ops.cast(m_mul, dtype)
-
- c_one = math_ops.cast(constant_op.constant(1.0), dtype)
- c_half = math_ops.cast(constant_op.constant(0.5), dtype)
- c_pi = math_ops.cast(constant_op.constant(math.pi), dtype)
-
- # Find normalized value of the current step
- x_val = math_ops.div(global_step, t_0)
-
- def compute_step(x_val, geometric=False):
- if geometric:
- # Consider geometric series where t_mul != 1
- # 1 + t_mul + t_mul^2 ... = (1 - t_mul^i_restart) / (1 - t_mul)
-
- # First find how many restarts were performed for a given x_val
- # Find maximal integer i_restart value for which this equation holds
- # x_val >= (1 - t_mul^i_restart) / (1 - t_mul)
- # x_val * (1 - t_mul) <= (1 - t_mul^i_restart)
- # t_mul^i_restart <= (1 - x_val * (1 - t_mul))
-
- # tensorflow allows only log with base e
- # i_restart <= log(1 - x_val * (1 - t_mul) / log(t_mul)
- # Find how many restarts were performed
-
- i_restart = math_ops.floor(
- math_ops.log(c_one - x_val * (c_one - t_mul)) / math_ops.log(t_mul))
- # Compute the sum of all restarts before the current one
- sum_r = (c_one - t_mul ** i_restart) / (c_one - t_mul)
- # Compute our position within the current restart
- x_val = (x_val - sum_r) / t_mul ** i_restart
-
- else:
- # Find how many restarts were performed
- i_restart = math_ops.floor(x_val)
- # Compute our position within the current restart
- x_val = x_val - i_restart
- return i_restart, x_val
-
- i_restart, x_val = control_flow_ops.cond(
- math_ops.equal(t_mul, c_one),
- lambda: compute_step(x_val, geometric=False),
- lambda: compute_step(x_val, geometric=True))
-
- # If m_mul < 1, then the initial learning rate of every new restart will be
- # smaller, i.e., by a factor of m_mul ** i_restart at i_restart-th restart
- m_fac = learning_rate * (m_mul ** i_restart)
-
- return math_ops.multiply(c_half * m_fac,
- (math_ops.cos(x_val * c_pi) + c_one), name=name)
diff --git a/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay_test.py b/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay_test.py
deleted file mode 100644
index 4a46e9a..0000000
--- a/tensorflow/contrib/training/python/training/sgdr_learning_rate_decay_test.py
+++ /dev/null
@@ -1,145 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Functional test for sgdr learning rate decay."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-
-from sgdr_learning_rate_decay import sgdr_decay
-from tensorflow.python.platform import googletest
-from tensorflow.python.framework import test_util
-from tensorflow.python.framework import dtypes
-from tensorflow import placeholder
-
-
-class SGDRDecayTest(test_util.TensorFlowTestCase):
- """Unit tests for SGDR learning rate decay."""
-
- def get_original_values(self, lr, t_e, mult_factor, iter_per_epoch, epochs):
- """Get an array with learning rate values from the consecutive steps using
- the original implementation
- (https://github.com/loshchil/SGDR/blob/master/SGDR_WRNs.py)."""
- t0 = math.pi / 2.0
- tt = 0
- te_next = t_e
-
- lr_values = []
- sh_lr = lr
- for epoch in range(epochs):
- for _ in range(iter_per_epoch):
- # In the original approach training function is executed here
- lr_values.append(sh_lr)
- dt = 2.0 * math.pi / float(2.0 * t_e)
- tt = tt + float(dt) / iter_per_epoch
- if tt >= math.pi:
- tt = tt - math.pi
- cur_t = t0 + tt
- new_lr = lr * (1.0 + math.sin(cur_t)) / 2.0 # lr_min = 0, lr_max = lr
- sh_lr = new_lr
- if (epoch + 1) == te_next: # time to restart
- sh_lr = lr
- tt = 0 # by setting to 0 we set lr to lr_max, see above
- t_e = t_e * mult_factor # change the period of restarts
- te_next = te_next + t_e # note the next restart's epoch
-
- return lr_values
-
- def get_sgdr_values(self, lr, initial_period_steps, t_mul, iters):
- """Get an array with learning rate values from the consecutive steps
- using current tensorflow implementation."""
- with self.test_session():
- step = placeholder(dtypes.int32)
-
- decay = sgdr_decay(lr, step, initial_period_steps, t_mul)
- lr_values = []
- for i in range(iters):
- lr_values.append(decay.eval(feed_dict={step: i}))
-
- return lr_values
-
- def testCompareToOriginal(self):
- """Compare values generated by tensorflow implementation to the values
- generated by the original implementation
- (https://github.com/loshchil/SGDR/blob/master/SGDR_WRNs.py)."""
- with self.test_session():
- lr = 10.0
- init_steps = 2
- t_mul = 3
- iters = 10
- epochs = 50
-
- org_lr = self.get_original_values(lr, init_steps, t_mul, iters, epochs)
- sgdr_lr = self.get_sgdr_values(lr, init_steps*iters, t_mul, iters*epochs)
-
- for org, sgdr in zip(org_lr, sgdr_lr):
- self.assertAllClose(org, sgdr)
-
- def testMDecay(self):
- """Test m_mul argument. Check values for learning rate at the beginning
- of the first, second, third and fourth period. """
- with self.test_session():
- step = placeholder(dtypes.int32)
-
- lr = 0.1
- t_e = 10
- t_mul = 3
- m_mul = 0.9
-
- decay = sgdr_decay(lr, step, t_e, t_mul, m_mul)
-
- test_step = 0
- self.assertAllClose(decay.eval(feed_dict={step: test_step}),
- lr)
-
- test_step = t_e
- self.assertAllClose(decay.eval(feed_dict={step: test_step}),
- lr * m_mul)
-
- test_step = t_e + t_e*t_mul
- self.assertAllClose(decay.eval(feed_dict={step: test_step}),
- lr * m_mul**2)
-
- test_step = t_e + t_e*t_mul + t_e * (t_mul**2)
- self.assertAllClose(decay.eval(feed_dict={step: test_step}),
- lr * (m_mul**3))
-
- def testCos(self):
- """Check learning rate values at the beginning, in the middle
- and at the end of the period."""
- with self.test_session():
- step = placeholder(dtypes.int32)
- lr = 0.2
- t_e = 1000
- t_mul = 1
-
- decay = sgdr_decay(lr, step, t_e, t_mul)
-
- test_step = 0
- self.assertAllClose(decay.eval(feed_dict={step: test_step}), lr)
-
- test_step = t_e//2
- self.assertAllClose(decay.eval(feed_dict={step: test_step}), lr/2)
-
- test_step = t_e
- self.assertAllClose(decay.eval(feed_dict={step: test_step}), lr)
-
- test_step = t_e*3//2
- self.assertAllClose(decay.eval(feed_dict={step: test_step}), lr/2)
-
-if __name__ == "__main__":
- googletest.main()
diff --git a/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt b/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt
index c7f8b6c..6cd76ff 100644
--- a/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt
@@ -43,6 +43,10 @@
indices.shape[:-1] + params.shape[indices.shape[-1]:]
+Note that on CPU, if an out of bound index is found, an error is returned.
+On GPU, if an out of bound index is found, a 0 is stored in the
+corresponding output value.
+
Some examples below.
Simple indexing into a matrix:
diff --git a/tensorflow/core/api_def/base_api/api_def_GatherV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_GatherV2.pbtxt
index c020176..162ef2b 100644
--- a/tensorflow/core/api_def/base_api/api_def_GatherV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_GatherV2.pbtxt
@@ -50,5 +50,9 @@
<div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
<img style="width:100%" src="https://www.tensorflow.org/images/Gather.png" alt>
</div>
+
+Note that on CPU, if an out of bound index is found, an error is returned.
+On GPU, if an out of bound index is found, a 0 is stored in the
+corresponding output value.
END
}
diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt
index 2373254..4cb8c06 100644
--- a/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt
@@ -98,5 +98,8 @@
[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]]
+
+Note that on CPU, if an out of bound index is found, an error is returned.
+On GPU, if an out of bound index is found, the index is ignored.
END
}
diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h
index 650aa42..8514d7c 100644
--- a/tensorflow/core/framework/numeric_types.h
+++ b/tensorflow/core/framework/numeric_types.h
@@ -25,7 +25,6 @@
#include "third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint"
// clang-format on
-#include "tensorflow/core/platform/cpu_info.h"
#include "tensorflow/core/platform/types.h"
namespace tensorflow {
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 0ffdc42..89b23f2 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -2507,36 +2507,42 @@
rinfo_.push_back({csinfo_.max_pool_grad,
mkl_op_registry::GetMklOpName(csinfo_.max_pool_grad),
CopyAttrsPooling, AlwaysRewrite});
+ /*
rinfo_.push_back({csinfo_.maximum,
mkl_op_registry::GetMklOpName(csinfo_.maximum),
CopyAttrsDataType, AlwaysRewrite});
rinfo_.push_back({csinfo_.mul,
mkl_op_registry::GetMklOpName(csinfo_.mul),
CopyAttrsDataType, AlwaysRewrite});
+ */
rinfo_.push_back({csinfo_.relu,
mkl_op_registry::GetMklOpName(csinfo_.relu),
CopyAttrsDataType, AlwaysRewrite});
rinfo_.push_back({csinfo_.relu_grad,
mkl_op_registry::GetMklOpName(csinfo_.relu_grad),
CopyAttrsDataType, AlwaysRewrite});
+ /*
rinfo_.push_back({csinfo_.tanh,
mkl_op_registry::GetMklOpName(csinfo_.tanh),
CopyAttrsDataType, AlwaysRewrite});
rinfo_.push_back({csinfo_.tanh_grad,
mkl_op_registry::GetMklOpName(csinfo_.tanh_grad),
CopyAttrsDataType, AlwaysRewrite});
+ */
rinfo_.push_back({csinfo_.reshape,
mkl_op_registry::GetMklOpName(csinfo_.reshape),
CopyAttrsReshape, AlwaysRewrite});
rinfo_.push_back({csinfo_.softmax,
mkl_op_registry::GetMklOpName(csinfo_.softmax),
CopyAttrsDataType, AlwaysRewrite});
+ /*
rinfo_.push_back({csinfo_.squared_difference,
mkl_op_registry::GetMklOpName(csinfo_.squared_difference),
CopyAttrsDataType, AlwaysRewrite});
rinfo_.push_back({csinfo_.sub,
mkl_op_registry::GetMklOpName(csinfo_.sub),
CopyAttrsDataType, AlwaysRewrite});
+ */
// Add info about which ops to add workspace edge to and the slots.
wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3});
diff --git a/tensorflow/core/kernels/constant_op_gpu.cu.cc b/tensorflow/core/kernels/constant_op_gpu.cu.cc
deleted file mode 100644
index 49beb49..0000000
--- a/tensorflow/core/kernels/constant_op_gpu.cu.cc
+++ /dev/null
@@ -1,115 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#if GOOGLE_CUDA
-
-#define EIGEN_USE_GPU
-
-#include "tensorflow/core/framework/register_types.h"
-#include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/kernels/fill_functor.h"
-#include "tensorflow/core/platform/types.h"
-
-namespace Eigen {
-namespace internal {
-
-template <typename T>
-struct scalar_const_op {
- typedef typename packet_traits<T>::type Packet;
-
- const T* val;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- scalar_const_op(const scalar_const_op& x)
- : val(x.val) {}
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_const_op(const T* v) : val(v) {}
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()() const {
- return *val;
- }
-
- template <typename PacketType = Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetOp() const {
- return internal::pset1<PacketType>(*val);
- }
-};
-
-template <typename T>
-struct functor_traits<scalar_const_op<T> > {
- enum {
- Cost = 1,
- PacketAccess = packet_traits<T>::Vectorizable,
- IsRepeatable = true
- };
-};
-
-} // end namespace internal
-} // end namespace Eigen
-
-namespace tensorflow {
-
-namespace functor {
-
-typedef Eigen::GpuDevice GPUDevice;
-
-// Partial specialization FillFunctor<Device=GPUDevice, T>
-template <typename T>
-struct FillFunctor<GPUDevice, T> {
- void operator()(const GPUDevice& d, typename TTypes<T>::Flat out,
- typename TTypes<T>::ConstScalar in) {
- Eigen::internal::scalar_const_op<T> f(in.data());
- To32Bit(out).device(d) = To32Bit(out).nullaryExpr(f);
- }
-};
-
-#define DEFINE_FILL_GPU(T) template struct FillFunctor<GPUDevice, T>;
-TF_CALL_REAL_NUMBER_TYPES(DEFINE_FILL_GPU);
-TF_CALL_bfloat16(DEFINE_FILL_GPU);
-TF_CALL_bool(DEFINE_FILL_GPU);
-#undef DEFINE_FILL_GPU
-
-// Partial specialization of FillFunctor<Device=GPUDevice, T>.
-template <typename T>
-struct SetZeroFunctor<GPUDevice, T> {
- void operator()(const GPUDevice& d, typename TTypes<T>::Flat out) {
- To32Bit(out).device(d) = To32Bit(out).constant(T(0));
- }
-};
-
-#define DEFINE_SETZERO_GPU(T) template struct SetZeroFunctor<GPUDevice, T>;
-TF_CALL_NUMBER_TYPES(DEFINE_SETZERO_GPU);
-TF_CALL_bfloat16(DEFINE_SETZERO_GPU);
-TF_CALL_bool(DEFINE_SETZERO_GPU);
-#undef DEFINE_SETZERO_GPU
-
-// Partial specialization of FillFunctor<Device=GPUDevice, T>.
-template <typename T>
-struct SetOneFunctor<GPUDevice, T> {
- void operator()(const GPUDevice& d, typename TTypes<T>::Flat out) {
- To32Bit(out).device(d) = To32Bit(out).constant(T(1));
- }
-};
-
-#define DEFINE_SETONE_GPU(T) template struct SetOneFunctor<GPUDevice, T>;
-TF_CALL_NUMBER_TYPES(DEFINE_SETONE_GPU);
-TF_CALL_bfloat16(DEFINE_SETONE_GPU);
-TF_CALL_bool(DEFINE_SETONE_GPU);
-#undef DEFINE_SETONE_GPU
-
-} // end namespace functor
-} // end namespace tensorflow
-
-#endif // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc b/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc
index 172deea..2a46494 100644
--- a/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc
+++ b/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc
@@ -541,6 +541,7 @@
int TileShortSide,
int size_of_t, Op op) {
// clang-format off
+
return (size_of_t == 16 && ((TileLongSide == 32 && op(TileShortSide, 4)) ||
(TileLongSide == 64 && op(TileShortSide, 4)) ||
(TileLongSide == 128 && op(TileShortSide, 4)) ||
@@ -568,6 +569,7 @@
(TileLongSide == 256 && op(TileShortSide, 8)) ||
(TileLongSide == 512 && op(TileShortSide, 4)) ||
(TileLongSide == 1024 && op(TileShortSide, 2))));
+
// clang-format on
}
diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc
index d0175df..8277179 100644
--- a/tensorflow/core/kernels/mkl_concat_op.cc
+++ b/tensorflow/core/kernels/mkl_concat_op.cc
@@ -650,10 +650,6 @@
// format and avoid calling eigen version.
if (!are_all_tf_inputs && !are_all_mkl_inputs) invoke_eigen = true;
- // Temporary fallback to Eigen until MKLDNN Concat performance
- // is improved. To be removed.
- invoke_eigen = true;
-
// Call Eigen library
if (invoke_eigen) {
TensorShapeList tf_input_shapes;
@@ -694,7 +690,7 @@
// It does not matter what data format we use here (NHWC or NCHW).
// We just need to ensure that output of Concat uses same data format
// as input.
- memory::desc(src_dims, MklDnnType<T>(), memory::format::nhwc);
+ memory::desc(src_dims, MklDnnType<T>(), memory::format::nchw);
srcs[k].SetUsrMem(src_md, &input_tensors[k]);
auto src_mpd = srcs[k].GetUsrMemPrimDesc();
@@ -720,7 +716,7 @@
} else {
// Again, format does not matter here. We just need to make it same as
// input format.
- dst_md = memory::desc(dst_dims, MklDnnType<T>(), memory::format::nhwc);
+ dst_md = memory::desc(dst_dims, MklDnnType<T>(), memory::format::nchw);
}
std::vector<primitive::at> inputs;
diff --git a/tensorflow/core/kernels/mkl_tfconv_op.cc b/tensorflow/core/kernels/mkl_tfconv_op.cc
deleted file mode 100644
index b48c735..0000000
--- a/tensorflow/core/kernels/mkl_tfconv_op.cc
+++ /dev/null
@@ -1,124 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifdef INTEL_MKL
-
-#include <algorithm>
-#include <vector>
-#include "tensorflow/core/framework/numeric_op.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/register_types.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/ops_util.h"
-#include "tensorflow/core/platform/cpu_info.h"
-#include "tensorflow/core/platform/macros.h"
-#include "tensorflow/core/util/tensor_format.h"
-
-#include "tensorflow/core/util/mkl_util.h"
-#include "mkl_dnn.h"
-#include "mkl_dnn_types.h"
-
-namespace tensorflow {
-typedef Eigen::ThreadPoolDevice CPUDevice;
-
-///////////////////////////////////////////////////////////
-// Op kernel
-///////////////////////////////////////////////////////////
-
-template <typename Device, typename T>
-class MklToTfOp : public OpKernel {
- public:
- explicit MklToTfOp(OpKernelConstruction* context) : OpKernel(context) {
- OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
- OP_REQUIRES_OK(context, context->GetAttr("T", &op_data_type));
- has_avx512f_ = port::TestCPUFeature(port::CPUFeature::AVX512F);
- }
-
- void Compute(OpKernelContext* context) override {
- // Check that input tensor is in MKL format.
- const Tensor& input_tensor = MklGetInput(context, 0);
- MklShape input_shape;
- GetMklShape(context, 0, &input_shape);
-
- // if input is already in Tf format, then just copy input tensor to output.
- if (!input_shape.IsMklTensor()) {
- context->set_output(0, input_tensor);
- VLOG(1) << "MKLToTFConversion: No conversion needed, "
- << "copying input to output";
- return;
- }
-
- // Check that input data type is same as operator data type and that it is
- // same as output data type.
- DataType input_data_type = input_type(0);
- DataType output_data_type = output_type(0);
- CHECK_EQ(op_data_type, input_data_type);
- CHECK_EQ(op_data_type, output_data_type);
-
- TensorShape output_shape;
- size_t ndims = input_shape.GetDimension();
- size_t* in_sizes = new size_t[ndims];
- for (size_t i = 0; i < ndims; i++) {
- // Outermost to innermost dimension
- output_shape.AddDim(input_shape.GetSizes()[input_shape.tf_dim_idx(i)]);
- in_sizes[i] = input_shape.GetSizes()[i];
- }
-
- // Allocate output tensor.
- Tensor* output_tensor = NULL;
- OP_REQUIRES_OK(context,
- context->allocate_output(0, output_shape, &output_tensor));
-
- dnnLayout_t output_layout =
- static_cast<dnnLayout_t>(input_shape.GetTfLayout());
- // Execute DNNConversion.
- void* input_buffer =
- static_cast<void*>(const_cast<T*>(input_tensor.flat<T>().data()));
- delete[] in_sizes;
- void* output_buffer =
- static_cast<void*>(const_cast<T*>(output_tensor->flat<T>().data()));
- input_shape.GetConvertedFlatData(output_layout, input_buffer,
- output_buffer);
- VLOG(1) << "MKLToTFConversion complete successfully.";
- }
-
- private:
- /// Data format of the operation
- string data_format_str;
-
- /// Data type of the operation
- DataType op_data_type;
-
- /// CPUIDInfo
- bool has_avx512f_ = false;
-};
-
-///////////////////////////////////////////////////////////
-// Register kernel
-///////////////////////////////////////////////////////////
-
-#define REGISTER_CPU(T) \
- REGISTER_KERNEL_BUILDER(Name("_MklToTf") \
- .Device(DEVICE_CPU) \
- .TypeConstraint<T>("T") \
- .Label(mkl_op_registry::kMklOpLabel), \
- MklToTfOp<CPUDevice, T>);
-
-TF_CALL_float(REGISTER_CPU);
-#undef REGISTER_CPU
-} // namespace tensorflow
-#endif /* INTEL_MKL */
diff --git a/tensorflow/core/kernels/record_input_op.cc b/tensorflow/core/kernels/record_input_op.cc
index 0c05349..841f9dc 100644
--- a/tensorflow/core/kernels/record_input_op.cc
+++ b/tensorflow/core/kernels/record_input_op.cc
@@ -38,6 +38,7 @@
GETATTR(int64, batch_size);
GETATTR(string, compression_type);
#undef GETATTR
+
OP_REQUIRES_OK(ctx, ctx->GetAttr("compression_type", &compression_type));
RecordYielder::Options yopts;
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index c7d9b97..9211a13 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -1560,6 +1560,10 @@
<img style="width:100%" src="https://www.tensorflow.org/images/Gather.png" alt>
</div>
+Note that on CPU, if an out of bound index is found, an error is returned.
+On GPU, if an out of bound index is found, a 0 is stored in the
+corresponding output value.
+
params: The tensor from which to gather values. Must be at least rank
`axis + 1`.
indices: Index tensor. Must be in range `[0, params.shape[axis])`.
@@ -1629,6 +1633,10 @@
indices.shape[:-1] + params.shape[indices.shape[-1]:]
+Note that on CPU, if an out of bound index is found, an error is returned.
+On GPU, if an out of bound index is found, a 0 is stored in the
+corresponding output value.
+
Some examples below.
Simple indexing into a matrix:
@@ -5413,6 +5421,9 @@
[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]]
+Note that on CPU, if an out of bound index is found, an error is returned.
+On GPU, if an out of bound index is found, the index is ignored.
+
indices: Index tensor.
updates: Updates to scatter into output.
shape: 1-D. The shape of the resulting tensor.
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 942bca6..6d83f8b 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -67,16 +67,14 @@
pxd_srcs.append(src)
# Invoke cython to produce the shared object libraries.
- cpp_outs = [src.split(".")[0] + ".cpp" for src in pyx_srcs]
- native.genrule(
- name = name + "_cython_translation",
- srcs = pyx_srcs,
- outs = cpp_outs,
- cmd = ("PYTHONHASHSEED=0 $(location @cython//:cython_binary) --cplus $(SRCS)"
- # Rename outputs to expected location.
- + """ && python -c 'import shutil, sys; n = len(sys.argv); [shutil.copyfile(src.split(".")[0] + ".cpp", dst) for src, dst in zip(sys.argv[1:], sys.argv[1+n//2:])]' $(SRCS) $(OUTS)"""),
- tools = ["@cython//:cython_binary"] + pxd_srcs,
- )
+ for filename in pyx_srcs:
+ native.genrule(
+ name = filename + "_cython_translation",
+ srcs = [filename],
+ outs = [filename.split(".")[0] + ".cpp"],
+ cmd = "PYTHONHASHSEED=0 $(location @cython//:cython_binary) --cplus $(SRCS) --output-file $(OUTS)",
+ tools = ["@cython//:cython_binary"] + pxd_srcs,
+ )
shared_objects = []
for src in pyx_srcs:
diff --git a/tensorflow/core/platform/vmodule_benchmark_test.cc b/tensorflow/core/platform/vmodule_benchmark_test.cc
deleted file mode 100644
index 0f9e75b..0000000
--- a/tensorflow/core/platform/vmodule_benchmark_test.cc
+++ /dev/null
@@ -1,28 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/test_benchmark.h"
-
-namespace tensorflow {
-
-static void BM_DisabledVlog(int iters) {
- for (int i = 0; i < iters; ++i) {
- VLOG(1) << "Testing VLOG(1)!";
- }
-}
-BENCHMARK(BM_DisabledVlog);
-
-} // namespace tensorflow
diff --git a/tensorflow/core/platform/vmodule_test.cc b/tensorflow/core/platform/vmodule_test.cc
deleted file mode 100644
index 47b4b2e..0000000
--- a/tensorflow/core/platform/vmodule_test.cc
+++ /dev/null
@@ -1,117 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// Test that popens a child process with the VLOG-ing environment variable set
-// for the logging framework, and observes VLOG_IS_ON and VLOG macro output.
-
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/platform.h"
-#include "tensorflow/core/platform/test.h"
-
-#include <string.h>
-
-namespace tensorflow {
-namespace {
-
-int RealMain(const char* argv0, bool do_vlog) {
- if (do_vlog) {
-#if !defined(PLATFORM_GOOGLE)
- // Note, we only test this when !defined(PLATFORM_GOOGLE) because
- // VmoduleActivated doesn't exist in that implementation.
- //
- // Also, we call this internal API to simulate what would happen if
- // differently-named translation units attempted to VLOG, so we don't need
- // to create dummy translation unit files.
- bool ok = internal::LogMessage::VmoduleActivated("vmodule_test.cc", 7) &&
- internal::LogMessage::VmoduleActivated("shoobadooba.h", 3);
- if (!ok) {
- fprintf(stderr, "vmodule activated levels not as expected.\n");
- return EXIT_FAILURE;
- }
-#endif
-
- // Print info on which VLOG levels are activated.
- fprintf(stderr, "VLOG_IS_ON(8)? %d\n", VLOG_IS_ON(8));
- fprintf(stderr, "VLOG_IS_ON(7)? %d\n", VLOG_IS_ON(7));
- fprintf(stderr, "VLOG_IS_ON(6)? %d\n", VLOG_IS_ON(6));
- // Do some VLOG-ing.
- VLOG(8) << "VLOG(8)";
- VLOG(7) << "VLOG(7)";
- VLOG(6) << "VLOG(6)";
- LOG(INFO) << "INFO";
- return EXIT_SUCCESS;
- }
-
- // Popen the child process.
- std::string command = std::string(argv0);
-#if defined(PLATFORM_GOOGLE)
- command = command + " do_vlog --vmodule=vmodule_test=7 --alsologtostderr";
-#else
- command =
- "TF_CPP_VMODULE=vmodule_test=7,shoobadooba=3 " + command + " do_vlog";
-#endif
- command += " 2>&1";
- fprintf(stderr, "Running: \"%s\"\n", command.c_str());
- FILE* f = popen(command.c_str(), "r");
- if (f == nullptr) {
- fprintf(stderr, "Failed to popen child: %s\n", strerror(errno));
- return EXIT_FAILURE;
- }
-
- // Read data from the child's stdout.
- constexpr int kBufferSizeBytes = 4096;
- char buffer[kBufferSizeBytes];
- size_t result = fread(buffer, sizeof(buffer[0]), kBufferSizeBytes - 1, f);
- if (result == 0) {
- fprintf(stderr, "Failed to read from child stdout: %zu %s\n", result,
- strerror(errno));
- return EXIT_FAILURE;
- }
- buffer[result] = '\0';
- int status = pclose(f);
- if (status == -1) {
- fprintf(stderr, "Failed to close popen child: %s\n", strerror(errno));
- return EXIT_FAILURE;
- }
-
- // Check output is as expected.
- const char kExpected[] =
- "VLOG_IS_ON(8)? 0\nVLOG_IS_ON(7)? 1\nVLOG_IS_ON(6)? 1\n";
- if (strstr(buffer, kExpected) == nullptr) {
- fprintf(stderr, "error: unexpected output from child: \"%.*s\"\n",
- kBufferSizeBytes, buffer);
- return EXIT_FAILURE;
- }
- bool ok = strstr(buffer, "VLOG(7)\n") != nullptr &&
- strstr(buffer, "VLOG(6)\n") != nullptr &&
- strstr(buffer, "VLOG(8)\n") == nullptr;
- if (!ok) {
- fprintf(stderr, "error: VLOG output not as expected: \"%.*s\"\n",
- kBufferSizeBytes, buffer);
- return EXIT_FAILURE;
- }
-
- // Success!
- return EXIT_SUCCESS;
-}
-
-} // namespace
-} // namespace tensorflow
-
-int main(int argc, char** argv) {
- testing::InitGoogleTest(&argc, argv);
- bool do_vlog = argc >= 2 && strcmp(argv[1], "do_vlog") == 0;
- return tensorflow::RealMain(argv[0], do_vlog);
-}
diff --git a/tensorflow/core/profiler/g3doc/advise.md b/tensorflow/core/profiler/g3doc/advise.md
index d0de831..379c3f1 100644
--- a/tensorflow/core/profiler/g3doc/advise.md
+++ b/tensorflow/core/profiler/g3doc/advise.md
@@ -1,6 +1,6 @@
## Auto Detect and Advise
-tfprof analyzes profiles and generates advises for common issues.
+tfprof analyzes profiles and generates advice for common issues.
### Run Advise.
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index c037a9b..3baab75 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -19,12 +19,12 @@
// TensorFlow uses semantic versioning, see http://semver.org/.
#define TF_MAJOR_VERSION 1
-#define TF_MINOR_VERSION 4
+#define TF_MINOR_VERSION 5
#define TF_PATCH_VERSION 0
// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
// "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX ""
+#define TF_VERSION_SUFFIX "-rc0"
#define TF_STR_HELPER(x) #x
#define TF_STR(x) TF_STR_HELPER(x)
diff --git a/tensorflow/docs_src/api_guides/cc/guide.md b/tensorflow/docs_src/api_guides/cc/guide.md
index 81fb1e1..4e51ada 100644
--- a/tensorflow/docs_src/api_guides/cc/guide.md
+++ b/tensorflow/docs_src/api_guides/cc/guide.md
@@ -1,6 +1,6 @@
# C++ API
-Note: By default [tensorflow.org](http://tensorflow.org) shows docs for the
+Note: By default [tensorflow.org](https://www.tensorflow.org) shows docs for the
most recent stable version. The instructions in this doc require building from
source. You will probably want to build from the `master` version of tensorflow.
You should, as a result, be sure you are following the
diff --git a/tensorflow/docs_src/extend/adding_an_op.md b/tensorflow/docs_src/extend/adding_an_op.md
index c52279b..15075e1 100644
--- a/tensorflow/docs_src/extend/adding_an_op.md
+++ b/tensorflow/docs_src/extend/adding_an_op.md
@@ -1,6 +1,6 @@
# Adding a New Op
-Note: By default [tensorflow.org](http://tensorflow.org) shows docs for the
+Note: By default [www.tensorflow.org](https://www.tensorflow.org) shows docs for the
most recent stable version. The instructions in this doc require building from
source. You will probably want to build from the `master` version of tensorflow.
You should, as a result, be sure you are following the
diff --git a/tensorflow/docs_src/install/index.md b/tensorflow/docs_src/install/index.md
index c4fc882..3c84886 100644
--- a/tensorflow/docs_src/install/index.md
+++ b/tensorflow/docs_src/install/index.md
@@ -4,7 +4,7 @@
operating systems:
* MacOS X 10.11 (El Capitan) or later.
- * Ubuntu 14.04 or later
+ * Ubuntu 16.04 or later
* Windows 7 or later.
Although you might be able to install TensorFlow on other laptop or desktop
diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md
index df622c6..d79cd14 100644
--- a/tensorflow/docs_src/install/install_c.md
+++ b/tensorflow/docs_src/install/install_c.md
@@ -38,7 +38,7 @@
OS="linux" # Change to "darwin" for macOS
TARGET_DIRECTORY="/usr/local"
curl -L \
- "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.4.0.tar.gz" |
+ "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.5.0-rc0.tar.gz" |
sudo tar -C $TARGET_DIRECTORY -xz
The `tar` command extracts the TensorFlow C library into the `lib`
diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md
index 8b3da49..49f5350 100644
--- a/tensorflow/docs_src/install/install_go.md
+++ b/tensorflow/docs_src/install/install_go.md
@@ -38,7 +38,7 @@
TF_TYPE="cpu" # Change to "gpu" for GPU support
TARGET_DIRECTORY='/usr/local'
curl -L \
- "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.4.0.tar.gz" |
+ "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.5.0-rc0.tar.gz" |
sudo tar -C $TARGET_DIRECTORY -xz
The `tar` command extracts the TensorFlow C library into the `lib`
diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md
index d189fa4..47b1251 100644
--- a/tensorflow/docs_src/install/install_java.md
+++ b/tensorflow/docs_src/install/install_java.md
@@ -17,7 +17,7 @@
(and we only support) these instructions on machines meeting the
following requirements:
- * Ubuntu 14.04 or higher; 64-bit, x86
+ * Ubuntu 16.04 or higher; 64-bit, x86
* macOS X 10.11 (El Capitan) or higher
* Windows 7 or higher; 64-bit, x86
@@ -36,7 +36,7 @@
<dependency>
<groupId>org.tensorflow</groupId>
<artifactId>tensorflow</artifactId>
- <version>1.4.0</version>
+ <version>1.5.0-rc0</version>
</dependency>
```
@@ -65,7 +65,7 @@
<dependency>
<groupId>org.tensorflow</groupId>
<artifactId>tensorflow</artifactId>
- <version>1.4.0</version>
+ <version>1.5.0-rc0</version>
</dependency>
</dependencies>
</project>
@@ -147,7 +147,7 @@
Take the following steps to install TensorFlow for Java on Linux or macOS:
1. Download
- [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0.jar),
+ [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.5.0-rc0.jar),
which is the TensorFlow Java Archive (JAR).
2. Decide whether you will run TensorFlow for Java on CPU(s) only or with
@@ -166,7 +166,7 @@
OS=$(uname -s | tr '[:upper:]' '[:lower:]')
mkdir -p ./jni
curl -L \
- "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.4.0.tar.gz" |
+ "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.5.0-rc0.tar.gz" |
tar -xz -C ./jni
### Install on Windows
@@ -174,10 +174,10 @@
Take the following steps to install TensorFlow for Java on Windows:
1. Download
- [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.4.0.jar),
+ [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.5.0-rc0.jar),
which is the TensorFlow Java Archive (JAR).
2. Download the following Java Native Interface (JNI) file appropriate for
- [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.4.0.zip).
+ [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.5.0-rc0.zip).
3. Extract this .zip file.
@@ -225,7 +225,7 @@
downloaded `.jar` in your `classpath` by using the `-cp` compilation flag
as follows:
-<pre><b>javac -cp libtensorflow-1.4.0.jar HelloTF.java</b></pre>
+<pre><b>javac -cp libtensorflow-1.5.0-rc0.jar HelloTF.java</b></pre>
### Running
@@ -239,11 +239,11 @@
For example, the following command line executes the `HelloTF` program on Linux
and macOS X:
-<pre><b>java -cp libtensorflow-1.4.0.jar:. -Djava.library.path=./jni HelloTF</b></pre>
+<pre><b>java -cp libtensorflow-1.5.0-rc0.jar:. -Djava.library.path=./jni HelloTF</b></pre>
And the following command line executes the `HelloTF` program on Windows:
-<pre><b>java -cp libtensorflow-1.4.0.jar;. -Djava.library.path=jni HelloTF</b></pre>
+<pre><b>java -cp libtensorflow-1.5.0-rc0.jar;. -Djava.library.path=jni HelloTF</b></pre>
If the program prints <tt>Hello from <i>version</i></tt>, you've successfully
installed TensorFlow for Java and are ready to use the API. If the program
diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md
index e3d5b80..275ff8c 100644
--- a/tensorflow/docs_src/install/install_linux.md
+++ b/tensorflow/docs_src/install/install_linux.md
@@ -6,7 +6,7 @@
following requirements:
* 64-bit desktops or laptops
- * Ubuntu 14.04 or higher
+ * Ubuntu 16.04 or higher
## Determine which TensorFlow to install
@@ -188,7 +188,7 @@
Virtualenv environment:
<pre>(tensorflow)$ <b>pip3 install --upgrade \
- https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl</b></pre>
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc0-cp34-cp34m-linux_x86_64.whl</b></pre>
If you encounter installation problems, see
[Common Installation Problems](#common_installation_problems).
@@ -293,7 +293,7 @@
<pre>
$ <b>sudo pip3 install --upgrade \
- https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl</b>
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc0-cp34-cp34m-linux_x86_64.whl</b>
</pre>
If this step fails, see
@@ -480,7 +480,7 @@
<pre>
(tensorflow)$ <b>pip install --ignore-installed --upgrade \
- https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl</b></pre>
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc0-cp34-cp34m-linux_x86_64.whl</b></pre>
<a name="ValidateYourInstallation"></a>
@@ -648,14 +648,14 @@
CPU only:
<pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc0-cp27-none-linux_x86_64.whl
</pre>
GPU support:
<pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0rc0-cp27-none-linux_x86_64.whl
</pre>
Note that GPU support requires the NVIDIA hardware and software described in
@@ -667,14 +667,14 @@
CPU only:
<pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc0-cp34-cp34m-linux_x86_64.whl
</pre>
GPU support:
<pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0rc0-cp34-cp34m-linux_x86_64.whl
</pre>
Note that GPU support requires the NVIDIA hardware and software described in
@@ -686,14 +686,14 @@
CPU only:
<pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc0-cp35-cp35m-linux_x86_64.whl
</pre>
GPU support:
<pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0rc0-cp35-cp35m-linux_x86_64.whl
</pre>
@@ -705,14 +705,14 @@
CPU only:
<pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0rc0-cp36-cp36m-linux_x86_64.whl
</pre>
GPU support:
<pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.4.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0rc0-cp36-cp36m-linux_x86_64.whl
</pre>
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md
index d4ab547..926ceae 100644
--- a/tensorflow/docs_src/install/install_mac.md
+++ b/tensorflow/docs_src/install/install_mac.md
@@ -115,7 +115,7 @@
TensorFlow in the active Virtualenv is as follows:
<pre> $ <b>pip3 install --upgrade \
- https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl</b></pre>
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0rc0-py2-none-any.whl</b></pre>
If you encounter installation problems, see
[Common Installation Problems](#common-installation-problems).
@@ -238,7 +238,7 @@
issue the following command:
<pre> $ <b>sudo pip3 install --upgrade \
- https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl</b> </pre>
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0rc0-py2-none-any.whl</b> </pre>
If the preceding command fails, see
[installation problems](#common-installation-problems).
@@ -347,7 +347,7 @@
TensorFlow for Python 2.7:
<pre> (<i>targetDirectory</i>)$ <b>pip install --ignore-installed --upgrade \
- https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl</b></pre>
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0rc0-py2-none-any.whl</b></pre>
<a name="ValidateYourInstallation"></a>
@@ -520,7 +520,7 @@
<pre>
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0rc0-py2-none-any.whl
</pre>
@@ -528,5 +528,5 @@
<pre>
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.4.0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0rc0-py3-none-any.whl
</pre>
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md
index e453bd6..90e93f5 100644
--- a/tensorflow/docs_src/install/install_sources.md
+++ b/tensorflow/docs_src/install/install_sources.md
@@ -25,8 +25,10 @@
following:
* [Bazel on Windows](https://bazel.build/versions/master/docs/windows.html)
-* [TensorFlow CMake build](https://github.com/tensorflow/tensorflow/tree/r0.12/tensorflow/contrib/cmake)
+* [TensorFlow CMake build](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/cmake)
+Note: Starting from 1.6 release, our prebuilt binaries will use AVX
+instructions. Older CPUs may not be able to execute these binaries.
## Determine which TensorFlow to install
@@ -359,10 +361,10 @@
The filename of the `.whl` file depends on your platform.
For example, the following command will install the pip package
-for TensorFlow 1.4.0 on Linux:
+for TensorFlow 1.5.0rc0 on Linux:
<pre>
-$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.4.0-py2-none-any.whl</b>
+$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.5.0rc0-py2-none-any.whl</b>
</pre>
## Validate your installation
diff --git a/tensorflow/docs_src/tutorials/deep_cnn.md b/tensorflow/docs_src/tutorials/deep_cnn.md
index 3692a02..67975402 100644
--- a/tensorflow/docs_src/tutorials/deep_cnn.md
+++ b/tensorflow/docs_src/tutorials/deep_cnn.md
@@ -83,21 +83,21 @@
## Code Organization
The code for this tutorial resides in
-[`models/tutorials/image/cifar10/`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/).
+[`models/tutorials/image/cifar10/`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/).
File | Purpose
--- | ---
-[`cifar10_input.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10_input.py) | Reads the native CIFAR-10 binary file format.
-[`cifar10.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10.py) | Builds the CIFAR-10 model.
-[`cifar10_train.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10_train.py) | Trains a CIFAR-10 model on a CPU or GPU.
-[`cifar10_multi_gpu_train.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10_multi_gpu_train.py) | Trains a CIFAR-10 model on multiple GPUs.
-[`cifar10_eval.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10_eval.py) | Evaluates the predictive performance of a CIFAR-10 model.
+[`cifar10_input.py`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/cifar10_input.py) | Reads the native CIFAR-10 binary file format.
+[`cifar10.py`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/cifar10.py) | Builds the CIFAR-10 model.
+[`cifar10_train.py`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/cifar10_train.py) | Trains a CIFAR-10 model on a CPU or GPU.
+[`cifar10_multi_gpu_train.py`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/cifar10_multi_gpu_train.py) | Trains a CIFAR-10 model on multiple GPUs.
+[`cifar10_eval.py`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/cifar10_eval.py) | Evaluates the predictive performance of a CIFAR-10 model.
## CIFAR-10 Model
The CIFAR-10 network is largely contained in
-[`cifar10.py`](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10/cifar10.py).
+[`cifar10.py`](https://www.tensorflow.org/code/tensorflow_models/tutorials/image/cifar10/cifar10.py).
The complete training
graph contains roughly 765 operations. We find that we can make the code most
reusable by constructing the graph with the following modules:
diff --git a/tensorflow/docs_src/tutorials/image_retraining.md b/tensorflow/docs_src/tutorials/image_retraining.md
index 52e6980..df15bc0 100644
--- a/tensorflow/docs_src/tutorials/image_retraining.md
+++ b/tensorflow/docs_src/tutorials/image_retraining.md
@@ -390,7 +390,7 @@
python tensorflow/examples/label_image/label_image.py \
--graph=/tmp/output_graph.pb --labels=/tmp/output_labels.txt \
--input_layer=input \
---output_layer=final_result:0 \
+--output_layer=final_result \
--input_height=224 --input_width=224 \
--input_mean=128 --input_std=128 \
--image=$HOME/flower_photos/daisy/21652746_cc379e0eea_m.jpg
diff --git a/tensorflow/docs_src/tutorials/word2vec.md b/tensorflow/docs_src/tutorials/word2vec.md
index 0a1c41c..3fe7352 100644
--- a/tensorflow/docs_src/tutorials/word2vec.md
+++ b/tensorflow/docs_src/tutorials/word2vec.md
@@ -23,7 +23,7 @@
This basic example contains the code needed to download some data, train on it a
bit and visualize the result. Once you get comfortable with reading and running
the basic version, you can graduate to
-[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec.py)
+[models/tutorials/embedding/word2vec.py](https://www.tensorflow.org/code/tensorflow_models/tutorials/embedding/word2vec.py)
which is a more serious implementation that showcases some more advanced
TensorFlow principles about how to efficiently use threads to move data into a
text model, how to checkpoint during training, etc.
@@ -341,7 +341,7 @@
Et voila! As expected, words that are similar end up clustering nearby each
other. For a more heavyweight implementation of word2vec that showcases more of
the advanced features of TensorFlow, see the implementation in
-[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec.py).
+[models/tutorials/embedding/word2vec.py](https://www.tensorflow.org/code/tensorflow_models/tutorials/embedding/word2vec.py).
## Evaluating Embeddings: Analogical Reasoning
@@ -357,7 +357,7 @@
To see how we do this evaluation, have a look at the `build_eval_graph()` and
`eval()` functions in
-[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec.py).
+[models/tutorials/embedding/word2vec.py](https://www.tensorflow.org/code/tensorflow_models/tutorials/embedding/word2vec.py).
The choice of hyperparameters can strongly influence the accuracy on this task.
To achieve state-of-the-art performance on this task requires training over a
@@ -385,13 +385,13 @@
custom data reader for your problem, as described in
@{$new_data_formats$New Data Formats}. For the case of Skip-Gram
modeling, we've actually already done this for you as an example in
-[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec.py).
+[models/tutorials/embedding/word2vec.py](https://www.tensorflow.org/code/tensorflow_models/tutorials/embedding/word2vec.py).
If your model is no longer I/O bound but you want still more performance, you
can take things further by writing your own TensorFlow Ops, as described in
@{$adding_an_op$Adding a New Op}. Again we've provided an
example of this for the Skip-Gram case
-[models/tutorials/embedding/word2vec_optimized.py](https://github.com/tensorflow/models/tree/master/tutorials/embedding/word2vec_optimized.py).
+[models/tutorials/embedding/word2vec_optimized.py](https://www.tensorflow.org/code/tensorflow_models/tutorials/embedding/word2vec_optimized.py).
Feel free to benchmark these against each other to measure performance
improvements at each stage.
diff --git a/tensorflow/java/src/main/java/org/tensorflow/Input.java b/tensorflow/java/src/main/java/org/tensorflow/Input.java
deleted file mode 100644
index 13bc463..0000000
--- a/tensorflow/java/src/main/java/org/tensorflow/Input.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-package org.tensorflow;
-
-/**
- * Interface implemented by operands of a TensorFlow operation.
- *
- * <p>Example usage:
- *
- * <pre>{@code
- * // The "decodeJpeg" operation can be used as input to the "cast" operation
- * Input decodeJpeg = ops.image().decodeJpeg(...);
- * ops.math().cast(decodeJpeg, DataType.FLOAT);
- *
- * // The output "y" of the "unique" operation can be used as input to the "cast" operation
- * Output y = ops.array().unique(...).y();
- * ops.math().cast(y, DataType.FLOAT);
- *
- * // The "split" operation can be used as input list to the "concat" operation
- * Iterable<? extends Input> split = ops.array().split(...);
- * ops.array().concat(0, split);
- * }</pre>
- */
-public interface Input<T> {
-
- /**
- * Returns the symbolic handle of a tensor.
- *
- * <p>Inputs to TensorFlow operations are outputs of another TensorFlow operation. This method is
- * used to obtain a symbolic handle that represents the computation of the input.
- *
- * @see OperationBuilder#addInput(Output)
- */
- Output<T> asOutput();
-}
diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/TFBool.java b/tensorflow/java/src/main/java/org/tensorflow/types/TFBool.java
deleted file mode 100644
index ab34f6a..0000000
--- a/tensorflow/java/src/main/java/org/tensorflow/types/TFBool.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// GENERATED FILE. To update, edit tftypes.pl instead.
-
-package org.tensorflow.types;
-
-import org.tensorflow.DataType;
-
-/** Represents a boolean. */
-public class TFBool implements TFType {
- private TFBool() {}
- static {
- Types.typeCodes.put(TFBool.class, DataType.BOOL);
- }
- static {
- Types.scalars.put(TFBool.class, false);
- }
-}
diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/TFDouble.java b/tensorflow/java/src/main/java/org/tensorflow/types/TFDouble.java
deleted file mode 100644
index 49e5d9f..0000000
--- a/tensorflow/java/src/main/java/org/tensorflow/types/TFDouble.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// GENERATED FILE. To update, edit tftypes.pl instead.
-
-package org.tensorflow.types;
-
-import org.tensorflow.DataType;
-
-/** Represents a 64-bit double precision floating point number. */
-public class TFDouble implements TFType {
- private TFDouble() {}
- static {
- Types.typeCodes.put(TFDouble.class, DataType.DOUBLE);
- }
- static {
- Types.scalars.put(TFDouble.class, 0.0);
- }
-}
diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/TFFloat.java b/tensorflow/java/src/main/java/org/tensorflow/types/TFFloat.java
deleted file mode 100644
index 8426ee4..0000000
--- a/tensorflow/java/src/main/java/org/tensorflow/types/TFFloat.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// GENERATED FILE. To update, edit tftypes.pl instead.
-
-package org.tensorflow.types;
-
-import org.tensorflow.DataType;
-
-/** Represents a 32-bit single precision floating point number. */
-public class TFFloat implements TFType {
- private TFFloat() {}
- static {
- Types.typeCodes.put(TFFloat.class, DataType.FLOAT);
- }
- static {
- Types.scalars.put(TFFloat.class, 0f);
- }
-}
diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/TFInt32.java b/tensorflow/java/src/main/java/org/tensorflow/types/TFInt32.java
deleted file mode 100644
index 3947b6a..0000000
--- a/tensorflow/java/src/main/java/org/tensorflow/types/TFInt32.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// GENERATED FILE. To update, edit tftypes.pl instead.
-
-package org.tensorflow.types;
-
-import org.tensorflow.DataType;
-
-/** Represents a 32-bit signed integer. */
-public class TFInt32 implements TFType {
- private TFInt32() {}
- static {
- Types.typeCodes.put(TFInt32.class, DataType.INT32);
- }
- static {
- Types.scalars.put(TFInt32.class, 0);
- }
-}
diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/TFInt64.java b/tensorflow/java/src/main/java/org/tensorflow/types/TFInt64.java
deleted file mode 100644
index ccdded8..0000000
--- a/tensorflow/java/src/main/java/org/tensorflow/types/TFInt64.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// GENERATED FILE. To update, edit tftypes.pl instead.
-
-package org.tensorflow.types;
-
-import org.tensorflow.DataType;
-
-/** Represents a 64-bit signed integer. */
-public class TFInt64 implements TFType {
- private TFInt64() {}
- static {
- Types.typeCodes.put(TFInt64.class, DataType.INT64);
- }
- static {
- Types.scalars.put(TFInt64.class, 0L);
- }
-}
diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/TFString.java b/tensorflow/java/src/main/java/org/tensorflow/types/TFString.java
deleted file mode 100644
index e7327e8..0000000
--- a/tensorflow/java/src/main/java/org/tensorflow/types/TFString.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// GENERATED FILE. To update, edit tftypes.pl instead.
-
-package org.tensorflow.types;
-
-import org.tensorflow.DataType;
-
-/** Represents an arbitrary sequence of bytes. */
-public class TFString implements TFType {
- private TFString() {}
- static {
- Types.typeCodes.put(TFString.class, DataType.STRING);
- }
-}
diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/TFType.java b/tensorflow/java/src/main/java/org/tensorflow/types/TFType.java
deleted file mode 100644
index 562953a..0000000
--- a/tensorflow/java/src/main/java/org/tensorflow/types/TFType.java
+++ /dev/null
@@ -1,20 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-package org.tensorflow.types;
-
-/**
- * A marker interface for classes representing TensorFlow types.
- */
-public interface TFType {}
diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/TFUInt8.java b/tensorflow/java/src/main/java/org/tensorflow/types/TFUInt8.java
deleted file mode 100644
index d7305ca..0000000
--- a/tensorflow/java/src/main/java/org/tensorflow/types/TFUInt8.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// GENERATED FILE. To update, edit tftypes.pl instead.
-
-package org.tensorflow.types;
-
-import org.tensorflow.DataType;
-
-/** Represents an 8-bit unsigned integer. */
-public class TFUInt8 implements TFType {
- private TFUInt8() {}
- static {
- Types.typeCodes.put(TFUInt8.class, DataType.UINT8);
- }
- static {
- Types.scalars.put(TFUInt8.class, (byte)0);
- }
-}
diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/Types.java b/tensorflow/java/src/main/java/org/tensorflow/types/Types.java
deleted file mode 100644
index 976cd9f..0000000
--- a/tensorflow/java/src/main/java/org/tensorflow/types/Types.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-package org.tensorflow.types;
-
-import java.util.HashMap;
-import java.util.Map;
-import org.tensorflow.DataType;
-
-/**
- * Utility class for managing the representation of TensorFlow types as Java
- * types. For each TensorFlow type (e.g., int32), there is a corresponding Java
- * type (e.g., TFInt32) that represents it at compile time and a corresponding
- * class object (e.g., TFInt32.class) that represents it at run time. There is
- * also an enumeration value in DataType that can be used to represent the
- * type, though that should rarely be required.
- */
-public class Types {
-
- private Types() {} // not instantiable
-
- static final Map<Class<?>, DataType> typeCodes = new HashMap<>();
-
- /** Returns the DataType value corresponding to a TensorFlow type class. */
- public static DataType dataType(Class<? extends TFType> c) {
- DataType dtype = typeCodes.get(c);
- if (dtype == null) {
- throw new IllegalArgumentException("" + c + " is not a TensorFlow type.");
- }
- return dtype;
- }
-
- static final Map<Class<?>, Object> scalars = new HashMap<>();
-
- /** Returns the zero value of type described by {@code c}, or null if
- * the type (e.g., string) is not numeric and therefore has no zero value.
- */
- public static Object zeroValue(Class<? extends TFType> c) {
- return scalars.get(c);
- }
-}
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index c62ff10..97467c5 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1238,6 +1238,7 @@
srcs = ["framework/dtypes_test.py"],
main = "framework/dtypes_test.py",
srcs_version = "PY2AND3",
+ tags = ["no_windows"],
deps = [
":framework_for_generated_wrappers",
":framework_test_lib",
@@ -3506,6 +3507,7 @@
size = "small",
srcs = ["lib/core/bfloat16_test.py"],
srcs_version = "PY2AND3",
+ tags = ["no_windows"],
deps = [
":client_testlib",
":lib",
diff --git a/tensorflow/python/debug/wrappers/dumping_wrapper.py b/tensorflow/python/debug/wrappers/dumping_wrapper.py
index 962318e..3fac2e5 100644
--- a/tensorflow/python/debug/wrappers/dumping_wrapper.py
+++ b/tensorflow/python/debug/wrappers/dumping_wrapper.py
@@ -73,6 +73,7 @@
self, sess, watch_fn=watch_fn, thread_name_filter=thread_name_filter,
pass_through_operrors=pass_through_operrors)
+ session_root = os.path.expanduser(session_root)
if gfile.Exists(session_root):
if not gfile.IsDirectory(session_root):
raise ValueError(
diff --git a/tensorflow/python/debug/wrappers/local_cli_wrapper.py b/tensorflow/python/debug/wrappers/local_cli_wrapper.py
index c46a4e7..1465cb7 100644
--- a/tensorflow/python/debug/wrappers/local_cli_wrapper.py
+++ b/tensorflow/python/debug/wrappers/local_cli_wrapper.py
@@ -82,6 +82,7 @@
if not dump_root:
self._dump_root = tempfile.mktemp(prefix=_DUMP_ROOT_PREFIX)
else:
+ dump_root = os.path.expanduser(dump_root)
if os.path.isfile(dump_root):
raise ValueError("dump_root path points to a file: %s" % dump_root)
elif os.path.isdir(dump_root) and os.listdir(dump_root):
diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index a06feb1..048dc92 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -550,7 +550,7 @@
def val_and_grad_function(f, params=None):
- """Returns a function that computes f and is derivative w.r.t. params.
+ """Returns a function that computes f and its derivative w.r.t. params.
Example:
```python
diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py
index 2d3f5d6..4f7da84 100644
--- a/tensorflow/python/estimator/training_test.py
+++ b/tensorflow/python/estimator/training_test.py
@@ -326,7 +326,7 @@
mock_executor.assert_called_with(estimator=mock_est,
train_spec=mock_train_spec,
eval_spec=mock_eval_spec)
- mock_executor_instance.run.assert_called()
+ self.assertTrue(mock_executor_instance.run.called)
def test_error_out_if_evaluator_task_id_is_non_zero(self):
tf_config = {
diff --git a/tensorflow/python/ops/gradient_checker.py b/tensorflow/python/ops/gradient_checker.py
index 1ff1968..65cc6ff 100644
--- a/tensorflow/python/ops/gradient_checker.py
+++ b/tensorflow/python/ops/gradient_checker.py
@@ -181,7 +181,7 @@
def _compute_dx_and_dy(x, y, y_shape):
- """Returns a node to compute gradient of x wrt y."""
+ """Returns a node to compute gradient of y wrt x."""
# We make up a dy so that we can compute the gradients. We don't really use
# the value of dy -- we will always feed it. We need to add an identity node
# so that we can always feed it properly. Otherwise, for the Add operation,
@@ -189,7 +189,7 @@
with x.graph.as_default():
dy_orig = constant_op.constant(1.0, shape=y_shape, dtype=y.dtype)
dy = array_ops.identity(dy_orig)
- # We compute the gradients for x wrt. y
+ # We compute the gradients for y wrt. x
grads = gradients.gradients(y, x, dy)
assert len(grads) == 1
return grads[0], dy_orig
diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index 7f494db..9bebffd 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -999,8 +999,8 @@
Args:
image : A Tensor.
- gamma : A scalar. Non negative real number.
- gain : A scalar. The constant multiplier.
+ gamma : A scalar or tensor. Non negative real number.
+ gain : A scalar or tensor. The constant multiplier.
Returns:
A Tensor. Gamma corrected output image.
@@ -1019,17 +1019,20 @@
"""
with ops.op_scope([image, gamma, gain], None, 'adjust_gamma'):
- # Convert pixel value to DT_FLOAT for computing adjusted image
+ # Convert pixel value to DT_FLOAT for computing adjusted image.
img = ops.convert_to_tensor(image, name='img', dtype=dtypes.float32)
- # Keep image dtype for computing the scale of corresponding dtype
+ # Keep image dtype for computing the scale of corresponding dtype.
image = ops.convert_to_tensor(image, name='image')
- if gamma < 0:
- raise ValueError('Gamma should be a non-negative real number')
- # scale = max(dtype) - min(dtype)
+ assert_op = _assert(gamma >= 0, ValueError,
+ 'Gamma should be a non-negative real number.')
+ if assert_op:
+ gamma = control_flow_ops.with_dependencies(assert_op, gamma)
+
+ # scale = max(dtype) - min(dtype).
scale = constant_op.constant(image.dtype.limits[1] - image.dtype.limits[0],
dtype=dtypes.float32)
- # According to the definition of gamma correction
+ # According to the definition of gamma correction.
adjusted_img = (img / scale) ** gamma * scale * gain
return adjusted_img
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index 3d73b77..3a49d41 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -189,6 +189,44 @@
self.assertAllClose(y_tf, y_np, 1e-6)
+ def test_adjust_gamma_less_zero(self):
+ """White image should be returned for gamma equal to zero"""
+ with self.test_session():
+ x_data = np.random.uniform(0, 255, (8, 8))
+ x_np = np.array(x_data, dtype=np.float32)
+
+ x = constant_op.constant(x_np, shape=x_np.shape)
+
+ err_msg = 'Gamma should be a non-negative real number.'
+
+ try:
+ image_ops.adjust_gamma(x, gamma=-1)
+ except Exception as e:
+ if err_msg not in str(e):
+ raise
+ else:
+ raise AssertionError("Exception not raised: %s" % err_msg)
+
+ def test_adjust_gamma_less_zero_tensor(self):
+ """White image should be returned for gamma equal to zero"""
+ with self.test_session():
+ x_data = np.random.uniform(0, 255, (8, 8))
+ x_np = np.array(x_data, dtype=np.float32)
+
+ x = constant_op.constant(x_np, shape=x_np.shape)
+ y = constant_op.constant(-1.0, dtype=dtypes.float32)
+
+ image = image_ops.adjust_gamma(x, gamma=y)
+
+ err_msg = 'Gamma should be a non-negative real number.'
+ try:
+ image.eval()
+ except Exception as e:
+ if err_msg not in str(e):
+ raise
+ else:
+ raise AssertionError("Exception not raised: %s" % err_msg)
+
def test_adjust_gamma_zero(self):
"""White image should be returned for gamma equal to zero"""
with self.test_session():
diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py
index 60a32b1..879c206 100644
--- a/tensorflow/python/ops/resource_variable_ops.py
+++ b/tensorflow/python/ops/resource_variable_ops.py
@@ -886,11 +886,6 @@
# Build appropriately shaped IndexedSlices
handle = op.inputs[0]
indices = op.inputs[1]
- if context.in_graph_mode():
- # Walk graph back until the original handle is found.
- # TODO(apassos): implement this for EAGER mode.
- while handle.op.type != "VarHandleOp":
- handle = handle.op.inputs[0]
params_shape = gen_resource_variable_ops.variable_shape(handle)
size = array_ops.expand_dims(array_ops.size(indices), 0)
values_shape = array_ops.concat([size, params_shape[1:]], 0)
diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
index 6e846ef..bfaa044 100755
--- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
@@ -47,8 +47,6 @@
./configure
make altinstall
-pip3.6 -V
-which pip3.6
ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3
pip3 install --upgrade virtualenv
@@ -73,7 +71,7 @@
pip3 install scipy==0.18.1
-pip3 install scikit-learn==0.18.1
+pip3 install scikit-learn==0.19.1
# pandas required by `inflow`
pip3 install pandas==0.19.2
diff --git a/tensorflow/tools/compatibility/ast_edits.py b/tensorflow/tools/compatibility/ast_edits.py
deleted file mode 100644
index e7e4c91..0000000
--- a/tensorflow/tools/compatibility/ast_edits.py
+++ /dev/null
@@ -1,497 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Upgrader for Python scripts according to an API change specification."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import ast
-import collections
-import os
-import shutil
-import sys
-import tempfile
-import traceback
-
-
-class APIChangeSpec(object):
- """This class defines the transformations that need to happen.
-
- This class must provide the following fields:
-
- * `function_keyword_renames`: maps function names to a map of old -> new
- argument names
- * `function_renames`: maps function names to new function names
- * `change_to_function`: a set of function names that have changed (for
- notifications)
- * `function_reorders`: maps functions whose argument order has changed to the
- list of arguments in the new order
- * `function_handle`: maps function names to custom handlers for the function
-
- For an example, see `TFAPIChangeSpec`.
- """
-
-
-class _FileEditTuple(collections.namedtuple(
- "_FileEditTuple", ["comment", "line", "start", "old", "new"])):
- """Each edit that is recorded by a _FileEditRecorder.
-
- Fields:
- comment: A description of the edit and why it was made.
- line: The line number in the file where the edit occurs (1-indexed).
- start: The line number in the file where the edit occurs (0-indexed).
- old: text string to remove (this must match what was in file).
- new: text string to add in place of `old`.
- """
-
- __slots__ = ()
-
-
-class _FileEditRecorder(object):
- """Record changes that need to be done to the file."""
-
- def __init__(self, filename):
- # all edits are lists of chars
- self._filename = filename
-
- self._line_to_edit = collections.defaultdict(list)
- self._errors = []
-
- def process(self, text):
- """Process a list of strings, each corresponding to the recorded changes.
-
- Args:
- text: A list of lines of text (assumed to contain newlines)
- Returns:
- A tuple of the modified text and a textual description of what is done.
- Raises:
- ValueError: if substitution source location does not have expected text.
- """
-
- change_report = ""
-
- # Iterate of each line
- for line, edits in self._line_to_edit.items():
- offset = 0
- # sort by column so that edits are processed in order in order to make
- # indexing adjustments cumulative for changes that change the string
- # length
- edits.sort(key=lambda x: x.start)
-
- # Extract each line to a list of characters, because mutable lists
- # are editable, unlike immutable strings.
- char_array = list(text[line - 1])
-
- # Record a description of the change
- change_report += "%r Line %d\n" % (self._filename, line)
- change_report += "-" * 80 + "\n\n"
- for e in edits:
- change_report += "%s\n" % e.comment
- change_report += "\n Old: %s" % (text[line - 1])
-
- # Make underscore buffers for underlining where in the line the edit was
- change_list = [" "] * len(text[line - 1])
- change_list_new = [" "] * len(text[line - 1])
-
- # Iterate for each edit
- for e in edits:
- # Create effective start, end by accounting for change in length due
- # to previous edits
- start_eff = e.start + offset
- end_eff = start_eff + len(e.old)
-
- # Make sure the edit is changing what it should be changing
- old_actual = "".join(char_array[start_eff:end_eff])
- if old_actual != e.old:
- raise ValueError("Expected text %r but got %r" %
- ("".join(e.old), "".join(old_actual)))
- # Make the edit
- char_array[start_eff:end_eff] = list(e.new)
-
- # Create the underline highlighting of the before and after
- change_list[e.start:e.start + len(e.old)] = "~" * len(e.old)
- change_list_new[start_eff:end_eff] = "~" * len(e.new)
-
- # Keep track of how to generate effective ranges
- offset += len(e.new) - len(e.old)
-
- # Finish the report comment
- change_report += " %s\n" % "".join(change_list)
- text[line - 1] = "".join(char_array)
- change_report += " New: %s" % (text[line - 1])
- change_report += " %s\n\n" % "".join(change_list_new)
- return "".join(text), change_report, self._errors
-
- def add(self, comment, line, start, old, new, error=None):
- """Add a new change that is needed.
-
- Args:
- comment: A description of what was changed
- line: Line number (1 indexed)
- start: Column offset (0 indexed)
- old: old text
- new: new text
- error: this "edit" is something that cannot be fixed automatically
- Returns:
- None
- """
-
- self._line_to_edit[line].append(
- _FileEditTuple(comment, line, start, old, new))
- if error:
- self._errors.append("%s:%d: %s" % (self._filename, line, error))
-
-
-class _ASTCallVisitor(ast.NodeVisitor):
- """AST Visitor that processes function calls.
-
- Updates function calls from old API version to new API version using a given
- change spec.
- """
-
- def __init__(self, filename, lines, api_change_spec):
- self._filename = filename
- self._file_edit = _FileEditRecorder(filename)
- self._lines = lines
- self._api_change_spec = api_change_spec
-
- def process(self, lines):
- return self._file_edit.process(lines)
-
- def generic_visit(self, node):
- ast.NodeVisitor.generic_visit(self, node)
-
- def _rename_functions(self, node, full_name):
- function_renames = self._api_change_spec.function_renames
- try:
- new_name = function_renames[full_name]
- self._file_edit.add("Renamed function %r to %r" % (full_name,
- new_name),
- node.lineno, node.col_offset, full_name, new_name)
- except KeyError:
- pass
-
- def _get_attribute_full_path(self, node):
- """Traverse an attribute to generate a full name e.g. tf.foo.bar.
-
- Args:
- node: A Node of type Attribute.
-
- Returns:
- a '.'-delimited full-name or None if the tree was not a simple form.
- i.e. `foo()+b).bar` returns None, while `a.b.c` would return "a.b.c".
- """
- curr = node
- items = []
- while not isinstance(curr, ast.Name):
- if not isinstance(curr, ast.Attribute):
- return None
- items.append(curr.attr)
- curr = curr.value
- items.append(curr.id)
- return ".".join(reversed(items))
-
- def _find_true_position(self, node):
- """Return correct line number and column offset for a given node.
-
- This is necessary mainly because ListComp's location reporting reports
- the next token after the list comprehension list opening.
-
- Args:
- node: Node for which we wish to know the lineno and col_offset
- """
- import re
- find_open = re.compile("^\s*(\\[).*$")
- find_string_chars = re.compile("['\"]")
-
- if isinstance(node, ast.ListComp):
- # Strangely, ast.ListComp returns the col_offset of the first token
- # after the '[' token which appears to be a bug. Workaround by
- # explicitly finding the real start of the list comprehension.
- line = node.lineno
- col = node.col_offset
- # loop over lines
- while 1:
- # Reverse the text to and regular expression search for whitespace
- text = self._lines[line-1]
- reversed_preceding_text = text[:col][::-1]
- # First find if a [ can be found with only whitespace between it and
- # col.
- m = find_open.match(reversed_preceding_text)
- if m:
- new_col_offset = col - m.start(1) - 1
- return line, new_col_offset
- else:
- if (reversed_preceding_text=="" or
- reversed_preceding_text.isspace()):
- line = line - 1
- prev_line = self._lines[line - 1]
- # TODO(aselle):
- # this is poor comment detection, but it is good enough for
- # cases where the comment does not contain string literal starting/
- # ending characters. If ast gave us start and end locations of the
- # ast nodes rather than just start, we could use string literal
- # node ranges to filter out spurious #'s that appear in string
- # literals.
- comment_start = prev_line.find("#")
- if comment_start == -1:
- col = len(prev_line) -1
- elif find_string_chars.search(prev_line[comment_start:]) is None:
- col = comment_start
- else:
- return None, None
- else:
- return None, None
- # Most other nodes return proper locations (with notably does not), but
- # it is not possible to use that in an argument.
- return node.lineno, node.col_offset
-
-
- def visit_Call(self, node): # pylint: disable=invalid-name
- """Handle visiting a call node in the AST.
-
- Args:
- node: Current Node
- """
-
-
- # Find a simple attribute name path e.g. "tf.foo.bar"
- full_name = self._get_attribute_full_path(node.func)
-
- # Make sure the func is marked as being part of a call
- node.func.is_function_for_call = True
-
- if full_name:
- # Call special handlers
- function_handles = self._api_change_spec.function_handle
- if full_name in function_handles:
- function_handles[full_name](self._file_edit, node)
-
- # Examine any non-keyword argument and make it into a keyword argument
- # if reordering required.
- function_reorders = self._api_change_spec.function_reorders
- function_keyword_renames = (
- self._api_change_spec.function_keyword_renames)
-
- if full_name in function_reorders:
- reordered = function_reorders[full_name]
- for idx, arg in enumerate(node.args):
- lineno, col_offset = self._find_true_position(arg)
- if lineno is None or col_offset is None:
- self._file_edit.add(
- "Failed to add keyword %r to reordered function %r"
- % (reordered[idx], full_name), arg.lineno, arg.col_offset,
- "", "",
- error="A necessary keyword argument failed to be inserted.")
- else:
- keyword_arg = reordered[idx]
- if (full_name in function_keyword_renames and
- keyword_arg in function_keyword_renames[full_name]):
- keyword_arg = function_keyword_renames[full_name][keyword_arg]
- self._file_edit.add("Added keyword %r to reordered function %r"
- % (reordered[idx], full_name), lineno,
- col_offset, "", keyword_arg + "=")
-
- # Examine each keyword argument and convert it to the final renamed form
- renamed_keywords = ({} if full_name not in function_keyword_renames else
- function_keyword_renames[full_name])
- for keyword in node.keywords:
- argkey = keyword.arg
- argval = keyword.value
-
- if argkey in renamed_keywords:
- argval_lineno, argval_col_offset = self._find_true_position(argval)
- if argval_lineno is not None and argval_col_offset is not None:
- # TODO(aselle): We should scan backward to find the start of the
- # keyword key. Unfortunately ast does not give you the location of
- # keyword keys, so we are forced to infer it from the keyword arg
- # value.
- key_start = argval_col_offset - len(argkey) - 1
- key_end = key_start + len(argkey) + 1
- if (self._lines[argval_lineno - 1][key_start:key_end] ==
- argkey + "="):
- self._file_edit.add("Renamed keyword argument from %r to %r" %
- (argkey, renamed_keywords[argkey]),
- argval_lineno,
- argval_col_offset - len(argkey) - 1,
- argkey + "=", renamed_keywords[argkey] + "=")
- continue
- self._file_edit.add(
- "Failed to rename keyword argument from %r to %r" %
- (argkey, renamed_keywords[argkey]),
- argval.lineno,
- argval.col_offset - len(argkey) - 1,
- "", "",
- error="Failed to find keyword lexographically. Fix manually.")
-
- ast.NodeVisitor.generic_visit(self, node)
-
- def visit_Attribute(self, node): # pylint: disable=invalid-name
- """Handle bare Attributes i.e. [tf.foo, tf.bar].
-
- Args:
- node: Node that is of type ast.Attribute
- """
- full_name = self._get_attribute_full_path(node)
- if full_name:
- self._rename_functions(node, full_name)
- if full_name in self._api_change_spec.change_to_function:
- if not hasattr(node, "is_function_for_call"):
- new_text = full_name + "()"
- self._file_edit.add("Changed %r to %r"%(full_name, new_text),
- node.lineno, node.col_offset, full_name, new_text)
-
- ast.NodeVisitor.generic_visit(self, node)
-
-
-class ASTCodeUpgrader(object):
- """Handles upgrading a set of Python files using a given API change spec."""
-
- def __init__(self, api_change_spec):
- if not isinstance(api_change_spec, APIChangeSpec):
- raise TypeError("Must pass APIChangeSpec to ASTCodeUpgrader, got %s" %
- type(api_change_spec))
- self._api_change_spec = api_change_spec
-
- def process_file(self, in_filename, out_filename):
- """Process the given python file for incompatible changes.
-
- Args:
- in_filename: filename to parse
- out_filename: output file to write to
- Returns:
- A tuple representing number of files processed, log of actions, errors
- """
-
- # Write to a temporary file, just in case we are doing an implace modify.
- with open(in_filename, "r") as in_file, \
- tempfile.NamedTemporaryFile("w", delete=False) as temp_file:
- ret = self.process_opened_file(
- in_filename, in_file, out_filename, temp_file)
-
- shutil.move(temp_file.name, out_filename)
- return ret
-
- # Broad exceptions are required here because ast throws whatever it wants.
- # pylint: disable=broad-except
- def process_opened_file(self, in_filename, in_file, out_filename, out_file):
- """Process the given python file for incompatible changes.
-
- This function is split out to facilitate StringIO testing from
- tf_upgrade_test.py.
-
- Args:
- in_filename: filename to parse
- in_file: opened file (or StringIO)
- out_filename: output file to write to
- out_file: opened file (or StringIO)
- Returns:
- A tuple representing number of files processed, log of actions, errors
- """
- process_errors = []
- text = "-" * 80 + "\n"
- text += "Processing file %r\n outputting to %r\n" % (in_filename,
- out_filename)
- text += "-" * 80 + "\n\n"
-
- parsed_ast = None
- lines = in_file.readlines()
- try:
- parsed_ast = ast.parse("".join(lines))
- except Exception:
- text += "Failed to parse %r\n\n" % in_filename
- text += traceback.format_exc()
- if parsed_ast:
- visitor = _ASTCallVisitor(in_filename, lines, self._api_change_spec)
- visitor.visit(parsed_ast)
- out_text, new_text, process_errors = visitor.process(lines)
- text += new_text
- if out_file:
- out_file.write(out_text)
- text += "\n"
- return 1, text, process_errors
- # pylint: enable=broad-except
-
- def process_tree(self, root_directory, output_root_directory,
- copy_other_files):
- """Processes upgrades on an entire tree of python files in place.
-
- Note that only Python files. If you have custom code in other languages,
- you will need to manually upgrade those.
-
- Args:
- root_directory: Directory to walk and process.
- output_root_directory: Directory to use as base.
- copy_other_files: Copy files that are not touched by this converter.
-
- Returns:
- A tuple of files processed, the report string ofr all files, and errors
- """
-
- # make sure output directory doesn't exist
- if output_root_directory and os.path.exists(output_root_directory):
- print("Output directory %r must not already exist." % (
- output_root_directory))
- sys.exit(1)
-
- # make sure output directory does not overlap with root_directory
- norm_root = os.path.split(os.path.normpath(root_directory))
- norm_output = os.path.split(os.path.normpath(output_root_directory))
- if norm_root == norm_output:
- print("Output directory %r same as input directory %r" % (
- root_directory, output_root_directory))
- sys.exit(1)
-
- # Collect list of files to process (we do this to correctly handle if the
- # user puts the output directory in some sub directory of the input dir)
- files_to_process = []
- files_to_copy = []
- for dir_name, _, file_list in os.walk(root_directory):
- py_files = [f for f in file_list if f.endswith(".py")]
- copy_files = [f for f in file_list if not f.endswith(".py")]
- for filename in py_files:
- fullpath = os.path.join(dir_name, filename)
- fullpath_output = os.path.join(
- output_root_directory, os.path.relpath(fullpath, root_directory))
- files_to_process.append((fullpath, fullpath_output))
- if copy_other_files:
- for filename in copy_files:
- fullpath = os.path.join(dir_name, filename)
- fullpath_output = os.path.join(
- output_root_directory, os.path.relpath(fullpath, root_directory))
- files_to_copy.append((fullpath, fullpath_output))
-
- file_count = 0
- tree_errors = []
- report = ""
- report += ("=" * 80) + "\n"
- report += "Input tree: %r\n" % root_directory
- report += ("=" * 80) + "\n"
-
- for input_path, output_path in files_to_process:
- output_directory = os.path.dirname(output_path)
- if not os.path.isdir(output_directory):
- os.makedirs(output_directory)
- file_count += 1
- _, l_report, l_errors = self.process_file(input_path, output_path)
- tree_errors += l_errors
- report += l_report
- for input_path, output_path in files_to_copy:
- output_directory = os.path.dirname(output_path)
- if not os.path.isdir(output_directory):
- os.makedirs(output_directory)
- shutil.copy(input_path, output_path)
- return file_count, report, tree_errors
diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index cd22f18..5dc4a05 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -70,7 +70,7 @@
# Download and build TensorFlow.
WORKDIR /tensorflow
-RUN git clone --branch=r1.4 --depth=1 https://github.com/tensorflow/tensorflow.git .
+RUN git clone --branch=r1.5 --depth=1 https://github.com/tensorflow/tensorflow.git .
# TODO(craigcitro): Don't install the pip package, since it makes it
# more difficult to experiment with local changes. Instead, just add
diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl
index 8180e5e..3c15fc9 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl
+++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl
@@ -3,7 +3,7 @@
LABEL maintainer="Clayne Robison<clayne.b.robison@intel.com>"
# These arguments are parameterized. Use --build-args to override.
-ARG TF_BRANCH=r1.4
+ARG TF_BRANCH=r1.5
ARG WHL_DIR=/whl
RUN apt-get update && apt-get install -y --no-install-recommends \
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index d0c540ae..07ffd38 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -79,7 +79,7 @@
# Download and build TensorFlow.
WORKDIR /tensorflow
-RUN git clone --branch=r1.4 --depth=1 https://github.com/tensorflow/tensorflow.git .
+RUN git clone --branch=r1.5 --depth=1 https://github.com/tensorflow/tensorflow.git .
# Configure the build for our CUDA configuration.
ENV CI_BUILD_PYTHON python
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
deleted file mode 100644
index 3bedc8c..0000000
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7
+++ /dev/null
@@ -1,115 +0,0 @@
-FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
-
-LABEL maintainer="Gunhan Gulsoy <gunan@google.com>"
-
-# It is possible to override these for releases.
-ARG TF_BRANCH=master
-ARG BAZEL_VERSION=0.5.4
-ARG TF_AVAILABLE_CPUS=32
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
- build-essential \
- curl \
- git \
- golang \
- libcurl3-dev \
- libfreetype6-dev \
- libpng12-dev \
- libzmq3-dev \
- pkg-config \
- python-dev \
- python-pip \
- rsync \
- software-properties-common \
- unzip \
- zip \
- zlib1g-dev \
- openjdk-8-jdk \
- openjdk-8-jre-headless \
- wget \
- && \
- apt-get clean && \
- rm -rf /var/lib/apt/lists/*
-
-RUN pip --no-cache-dir install --upgrade \
- pip setuptools
-
-RUN pip --no-cache-dir install \
- ipykernel \
- jupyter \
- matplotlib \
- numpy \
- scipy \
- sklearn \
- pandas \
- wheel \
- && \
- python -m ipykernel.kernelspec
-
-# Set up our notebook config.
-COPY jupyter_notebook_config.py /root/.jupyter/
-
-# Jupyter has issues with being run directly:
-# https://github.com/ipython/ipython/issues/7062
-# We just add a little wrapper script.
-COPY run_jupyter.sh /
-
-# Set up Bazel.
-
-# Running bazel inside a `docker build` command causes trouble, cf:
-# https://github.com/bazelbuild/bazel/issues/134
-# The easiest solution is to set up a bazelrc file forcing --batch.
-RUN echo "startup --batch" >>/etc/bazel.bazelrc
-# Similarly, we need to workaround sandboxing issues:
-# https://github.com/bazelbuild/bazel/issues/418
-RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
- >>/etc/bazel.bazelrc
-WORKDIR /
-RUN mkdir /bazel && \
- cd /bazel && \
- wget --quiet https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \
- wget --quiet https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE && \
- chmod +x bazel-*.sh && \
- ./bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \
- rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
-
-# Download and build TensorFlow.
-WORKDIR /
-RUN git clone https://github.com/tensorflow/tensorflow.git && \
- cd tensorflow && \
- git checkout ${TF_BRANCH}
-WORKDIR /tensorflow
-
-# Configure the build for our CUDA configuration.
-ENV CI_BUILD_PYTHON=python \
- LD_LIBRARY_PATH=/usr/local/cuda/extras/CUPTI/lib64:${LD_LIBRARY_PATH} \
- CUDNN_INSTALL_PATH=/usr/lib/x86_64-linux-gnu \
- PYTHON_BIN_PATH=/usr/bin/python \
- PYTHON_LIB_PATH=/usr/local/lib/python2.7/dist-packages \
- TF_NEED_CUDA=1 \
- TF_CUDA_VERSION=9.0 \
- TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2,6.0,6.1,7.0 \
- TF_CUDNN_VERSION=7
-RUN ./configure
-
-# Build and Install TensorFlow.
-RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
- LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH} \
- bazel build -c opt \
- --config=cuda \
- --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \
- --jobs=${TF_AVAILABLE_CPUS} \
- tensorflow/tools/pip_package:build_pip_package && \
- mkdir /pip_pkg && \
- bazel-bin/tensorflow/tools/pip_package/build_pip_package /pip_pkg && \
- pip --no-cache-dir install --upgrade /pip_pkg/tensorflow-*.whl && \
- rm -rf /pip_pkg && \
- rm -rf /root/.cache
-# Clean up pip wheel and Bazel cache when done.
-
-WORKDIR /root
-
-# TensorBoard
-EXPOSE 6006
-# IPython
-EXPOSE 8888
diff --git a/tensorflow/tools/git/gen_git_source.py b/tensorflow/tools/git/gen_git_source.py
index f2845c8..3630dbd 100755
--- a/tensorflow/tools/git/gen_git_source.py
+++ b/tensorflow/tools/git/gen_git_source.py
@@ -16,7 +16,10 @@
"""Help include git hash in tensorflow bazel build.
This creates symlinks from the internal git repository directory so
-that the build system can see changes in the version state.
+that the build system can see changes in the version state. We also
+remember what branch git was on so when the branch changes we can
+detect that the ref file is no longer correct (so we can suggest users
+run ./configure again).
NOTE: this script is only used in opensource.
@@ -218,14 +221,13 @@
if not data["git"]:
git_version = b"unknown"
else:
- old_branch = data["branch"]
+ old_branch = data["branch"]
new_branch = parse_branch_ref(head_symlink)
if new_branch != old_branch:
- print("Warning, run ./configure again, to get __git_version__ to record "
- "correct version")
- git_version = get_git_version(data["path"])+'-inconsistent-git-version'
- else:
- git_version = get_git_version(data["path"])
+ raise RuntimeError(
+ "Run ./configure again, branch was '%s' but is now '%s'" %
+ (old_branch, new_branch))
+ git_version = get_git_version(data["path"])
write_version_info(dest_file, git_version)
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 72116f7..c32461d 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -154,6 +154,7 @@
"//tensorflow:tensorflow_py",
"//tensorflow/contrib/boosted_trees:boosted_trees_pip",
"//tensorflow/contrib/cluster_resolver:cluster_resolver_pip",
+ "//tensorflow/contrib/data/python/kernel_tests:dataset_serialization_test",
"//tensorflow/contrib/data/python/ops:prefetching_py",
"//tensorflow/contrib/eager/python/examples:examples_pip",
"//tensorflow/contrib/eager/python:checkpointable",
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index e03faee..2e31d6e 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -29,7 +29,7 @@
# This version string is semver compatible, but incompatible with pip.
# For pip, we will remove all '-' characters from this string, and use the
# result for pip.
-_VERSION = '1.4.0'
+_VERSION = '1.5.0-rc0'
REQUIRED_PACKAGES = [
'absl-py >= 0.1.6',
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 4f50d9d..6ad42d9 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -97,11 +97,11 @@
tf_http_archive(
name = "eigen_archive",
urls = [
- "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/c2947c341c68.tar.gz",
- "https://bitbucket.org/eigen/eigen/get/c2947c341c68.tar.gz",
+ "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/034b6c3e1017.tar.gz",
+ "https://bitbucket.org/eigen/eigen/get/034b6c3e1017.tar.gz",
],
- sha256 = "f21f8ab8a8dbcb91cd0deeade19a043f47708d0da7a4000164cdf203b4a71e34",
- strip_prefix = "eigen-eigen-c2947c341c68",
+ sha256 = "0a8ac1e83ef9c26c0e362bd7968650b710ce54e2d883f0df84e5e45a3abe842a",
+ strip_prefix = "eigen-eigen-034b6c3e1017",
build_file = str(Label("//third_party:eigen.BUILD")),
)
diff --git a/third_party/boringssl/add_boringssl_s390x.patch b/third_party/boringssl/add_boringssl_s390x.patch
deleted file mode 100644
index 8b42d10..0000000
--- a/third_party/boringssl/add_boringssl_s390x.patch
+++ /dev/null
@@ -1,133 +0,0 @@
-diff --git a/src/include/openssl/base.h b/src/include/openssl/base.h
-index 7a3adfb..88012ad 100644
---- a/src/include/openssl/base.h
-+++ b/src/include/openssl/base.h
-@@ -94,6 +94,8 @@ extern "C" {
- #define OPENSSL_PNACL
- #elif defined(__myriad2__)
- #define OPENSSL_32_BIT
-+#elif defined(__s390x__)
-+#define OPENSSL_64_BIT
- #else
- #error "Unknown target CPU"
- #endif
-diff --git a/BUILD b/BUILD
-index 6b645e61..c90b7beb 100644
---- a/BUILD
-+++ b/BUILD
-@@ -40,29 +40,46 @@ config_setting(
- values = {"cpu": "darwin"},
- )
-
--boringssl_copts = [
-- # Assembler option --noexecstack adds .note.GNU-stack to each object to
-- # ensure that binaries can be built with non-executable stack.
-- "-Wa,--noexecstack",
--
-- # This is needed on Linux systems (at least) to get rwlock in pthread.
-- "-D_XOPEN_SOURCE=700",
--
-- # This list of warnings should match those in the top-level CMakeLists.txt.
-- "-Wall",
-- "-Werror",
-- "-Wformat=2",
-- "-Wsign-compare",
-- "-Wmissing-field-initializers",
-- "-Wwrite-strings",
-- "-Wshadow",
-- "-fno-common",
--
-- # Modern build environments should be able to set this to use atomic
-- # operations for reference counting rather than locks. However, it's
-- # known not to work on some Android builds.
-- # "-DOPENSSL_C11_ATOMIC",
--] + select({
-+config_setting(
-+ name = "windows",
-+ values = {"cpu": "x64_windows"},
-+ visibility = ["//visibility:public"],
-+)
-+
-+config_setting(
-+ name = "windows_msvc",
-+ values = {"cpu": "x64_windows_msvc"},
-+ visibility = ["//visibility:public"],
-+)
-+
-+boringssl_copts = select({
-+ ":windows": [
-+ "-DWIN32_LEAN_AND_MEAN",
-+ ],
-+ "//conditions:default": [
-+ # Assembler option --noexecstack adds .note.GNU-stack to each object to
-+ # ensure that binaries can be built with non-executable stack.
-+ "-Wa,--noexecstack",
-+
-+ # This is needed on Linux systems (at least) to get rwlock in pthread.
-+ "-D_XOPEN_SOURCE=700",
-+
-+ # This list of warnings should match those in the top-level CMakeLists.txt.
-+ "-Wall",
-+ "-Werror",
-+ "-Wformat=2",
-+ "-Wsign-compare",
-+ "-Wmissing-field-initializers",
-+ "-Wwrite-strings",
-+ "-Wshadow",
-+ "-fno-common",
-+
-+ # Modern build environments should be able to set this to use atomic
-+ # operations for reference counting rather than locks. However, it's
-+ # known not to work on some Android builds.
-+ # "-DOPENSSL_C11_ATOMIC",
-+ ],
-+}) + select({
- ":linux_x86_64": [],
- ":mac_x86_64": [],
- "//conditions:default": ["-DOPENSSL_NO_ASM"],
-@@ -75,18 +92,26 @@ crypto_sources_asm = select({
- })
-
- # For C targets only (not C++), compile with C11 support.
--boringssl_copts_c11 = boringssl_copts + [
-- "-std=c11",
-- "-Wmissing-prototypes",
-- "-Wold-style-definition",
-- "-Wstrict-prototypes",
--]
-+boringssl_copts_c11 = boringssl_copts + select({
-+ ":windows": [],
-+ ":windows_msvc": [],
-+ "//conditions:default": [
-+ "-std=c11",
-+ "-Wmissing-prototypes",
-+ "-Wold-style-definition",
-+ "-Wstrict-prototypes",
-+ ],
-+})
-
- # For C targets only (not C++), compile with C11 support.
--boringssl_copts_cxx = boringssl_copts + [
-- "-std=c++11",
-- "-Wmissing-declarations",
--]
-+boringssl_copts_cxx = boringssl_copts + select({
-+ ":windows": [],
-+ ":windows_msvc": [],
-+ "//conditions:default": [
-+ "-std=c++11",
-+ "-Wmissing-declarations",
-+ ],
-+})
-
- cc_library(
- name = "crypto",
-@@ -96,6 +121,8 @@ cc_library(
- includes = ["src/include"],
- linkopts = select({
- ":mac_x86_64": [],
-+ ":windows": [],
-+ ":windows_msvc": [],
- "//conditions:default": ["-lpthread"],
- }),
- visibility = ["//visibility:public"],
diff --git a/third_party/git/git_configure.bzl b/third_party/git/git_configure.bzl
index bd197bf..47e2125 100644
--- a/third_party/git/git_configure.bzl
+++ b/third_party/git/git_configure.bzl
@@ -1,4 +1,31 @@
-"""Repository rule for Git autoconfiguration."""
+"""Repository rule for Git autoconfiguration.
+
+`git_configure` depends on the following environment variables:
+
+ * `PYTHON_BIN_PATH`: location of python binary.
+"""
+
+_PYTHON_BIN_PATH = "PYTHON_BIN_PATH"
+
+def _fail(msg):
+ """Output failure message when auto configuration fails."""
+ red = "\033[0;31m"
+ no_color = "\033[0m"
+ fail("%sGit Configuration Error:%s %s\n" % (red, no_color, msg))
+
+def _get_python_bin(repository_ctx):
+ """Gets the python bin path."""
+ python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH)
+ if python_bin != None:
+ return python_bin
+ python_bin_path = repository_ctx.which("python")
+ if python_bin_path != None:
+ return str(python_bin_path)
+ _fail("Cannot find python in PATH, please make sure " +
+ "python is installed and add its directory in PATH, or --define " +
+ "%s='/something/else'.\nPATH=%s" % (
+ _PYTHON_BIN_PATH, repository_ctx.os.environ.get("PATH", "")))
+
def _git_conf_impl(repository_ctx):
repository_ctx.template(
@@ -11,10 +38,18 @@
Label("@org_tensorflow//tensorflow/tools/git:gen_git_source.py"))
generated_files_path = repository_ctx.path("gen")
- repository_ctx.execute([
+ result = repository_ctx.execute([
+ _get_python_bin(repository_ctx),
python_script_path, "--configure", tensorflow_root_path,
"--gen_root_path", generated_files_path], quiet=False)
+ if not result.return_code == 0:
+ _fail(result.stderr)
+
+
git_configure = repository_rule(
implementation = _git_conf_impl,
+ environ = [
+ _PYTHON_BIN_PATH,
+ ],
)
diff --git a/third_party/nanopb.BUILD b/third_party/nanopb.BUILD
deleted file mode 100644
index d218669..0000000
--- a/third_party/nanopb.BUILD
+++ /dev/null
@@ -1,23 +0,0 @@
-# Description:
-# Nanopb, a tiny ANSI C protobuf implementation for use on embedded devices.
-
-licenses(["notice"]) # zlib license
-
-exports_files(["LICENSE.txt"])
-
-cc_library(
- name = "nanopb",
- srcs = [
- "pb_common.c",
- "pb_decode.c",
- "pb_encode.c",
- ],
- hdrs = [
- "pb.h",
- "pb_common.h",
- "pb_decode.h",
- "pb_encode.h",
- ],
- includes = ["."],
- visibility = ["//visibility:public"],
-)
diff --git a/third_party/repo.bzl b/third_party/repo.bzl
index c29fef9..11e9c84 100644
--- a/third_party/repo.bzl
+++ b/third_party/repo.bzl
@@ -22,6 +22,14 @@
def _is_windows(ctx):
return ctx.os.name.lower().find("windows") != -1
+def _wrap_bash_cmd(ctx, cmd):
+ if _is_windows(ctx):
+ bazel_sh = _get_env_var(ctx, "BAZEL_SH")
+ if not bazel_sh:
+ fail("BAZEL_SH environment variable is not set")
+ cmd = [bazel_sh, "-c", " ".join(cmd)]
+ return cmd
+
def _get_env_var(ctx, name):
if name in ctx.os.environ:
return ctx.os.environ[name]
@@ -46,12 +54,8 @@
# Don't check patch on Windows, because patch is only available under bash.
if not _is_windows(ctx) and not ctx.which("patch"):
fail("patch command is not found, please install it")
- cmd = ["patch", "-p1", "-d", ctx.path("."), "-i", ctx.path(patch_file)]
- if _is_windows(ctx):
- bazel_sh = _get_env_var(ctx, "BAZEL_SH")
- if not bazel_sh:
- fail("BAZEL_SH environment variable is not set")
- cmd = [bazel_sh, "-c", " ".join(cmd)]
+ cmd = _wrap_bash_cmd(
+ ctx, ["patch", "-p1", "-d", ctx.path("."), "-i", ctx.path(patch_file)])
_execute_and_check_ret_code(ctx, cmd)
def _apply_delete(ctx, paths):
@@ -60,8 +64,8 @@
fail("refusing to rm -rf path starting with '/': " + path)
if ".." in path:
fail("refusing to rm -rf path containing '..': " + path)
- _execute_and_check_ret_code(
- ctx, ["rm", "-rf"] + [ctx.path(path) for path in paths])
+ cmd = _wrap_bash_cmd(ctx, ["rm", "-rf"] + [ctx.path(path) for path in paths])
+ _execute_and_check_ret_code(ctx, cmd)
def _tf_http_archive(ctx):
if ("mirror.bazel.build" not in ctx.attr.urls[0] or