Merge changes from github.
Change: 137532946
diff --git a/README.md b/README.md
index 49b1983..1372f20 100644
--- a/README.md
+++ b/README.md
@@ -33,10 +33,10 @@
People who are a little more adventurous can also try our nightly binaries:
-* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/))
-* Linux GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
-* Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/))
-* Mac GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/))
+* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/))
+* Linux GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
+* Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac1-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac1-slave/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac1-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac1-slave/))
+* Mac GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-mac/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-mac-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-mac/))
* [Android](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/lastSuccessfulBuild/artifact/bazel-out/local_linux/bin/tensorflow/examples/android/tensorflow_demo.apk) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-android/TF_BUILD_CONTAINER_TYPE=ANDROID,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=NO_PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=android-slave/))
#### *Try your first TensorFlow program*
diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 7d94f8a..a7dd248 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -15,6 +15,7 @@
# Options
option(tensorflow_VERBOSE "Enable for verbose output" OFF)
+option(tensorflow_ENABLE_GPU "Enable GPU support" OFF)
option(tensorflow_ENABLE_SSL_SUPPORT "Enable boringssl support" OFF)
option(tensorflow_ENABLE_GRPC_SUPPORT "Enable gRPC support" ON)
option(tensorflow_BUILD_CC_EXAMPLE "Build the C++ tutorial example" ON)
@@ -48,8 +49,13 @@
add_definitions(-DEIGEN_AVOID_STL_ARRAY)
if(WIN32)
add_definitions(-DNOMINMAX -D_WIN32_WINNT=0x0A00 -DLANG_CXX11 -DCOMPILER_MSVC -D__VERSION__=\"MSVC\")
+ add_definitions(-DWIN32 -DOS_WIN -D_MBCS -DWIN64 -DWIN32_LEAN_AND_MEAN -DNOGDI -DPLATFORM_WINDOWS)
+ add_definitions(-DTENSORFLOW_USE_EIGEN_THREADPOOL -DEIGEN_HAS_C99_MATH -D_ITERATOR_DEBUG_LEVEL=0)
+ add_definitions(/bigobj /nologo /EHsc /GF /FC /MP /Gm-)
# Suppress warnings to reduce build log size.
add_definitions(/wd4267 /wd4244 /wd4800 /wd4503 /wd4554 /wd4996 /wd4348 /wd4018)
+ add_definitions(/wd4099 /wd4146 /wd4267 /wd4305 /wd4307)
+ add_definitions(/wd4715 /wd4722 /wd4723 /wd4838 /wd4309 /wd4334)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
endif()
@@ -80,7 +86,16 @@
${protobuf_STATIC_LIBRARIES}
)
set(tensorflow_EXTERNAL_DEPENDENCIES
- gif_copy_headers_to_destination png_copy_headers_to_destination jpeg_copy_headers_to_destination jsoncpp farmhash_copy_headers_to_destination highwayhash_copy_headers_to_destination protobuf eigen)
+ zlib_copy_headers_to_destination
+ gif_copy_headers_to_destination
+ png_copy_headers_to_destination
+ jpeg_copy_headers_to_destination
+ jsoncpp
+ farmhash_copy_headers_to_destination
+ highwayhash_copy_headers_to_destination
+ protobuf
+ eigen
+)
include_directories(
# Source and generated code.
@@ -118,19 +133,67 @@
list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS})
endif()
+if (tensorflow_ENABLE_GPU)
+ if (WIN32)
+ find_package(CUDA 8.0 REQUIRED)
+
+ # by default we assume compute cabability 3.5 and 5.2. If you change this change it in
+ # CUDA_NVCC_FLAGS and cuda_config.h below
+ set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_35,code=\"sm_35,compute_35\";-gencode arch=compute_52,code=\"sm_52,compute_52\")
+ set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr)
+ set(CUDA_INCLUDE ${CUDA_TOOLKIT_TARGET_DIR} ${CUDA_TOOLKIT_TARGET_DIR}/extras/CUPTI/include)
+ include_directories(${CUDA_INCLUDE})
+ add_definitions(-DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=3.5,5.2)
+
+ # add cudnn
+ include_directories(${CUDNN_HOME})
+ set(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDNN_HOME}/lib/x64/cudnn.lib)
+
+ # create cuda_config.h
+ FILE(WRITE ${tensorflow_source_dir}/third_party/gpus/cuda/cuda_config.h
+ "#ifndef CUDA_CUDA_CONFIG_H_\n"
+ "#define CUDA_CUDA_CONFIG_H_\n"
+ "#define TF_CUDA_CAPABILITIES CudaVersion(\"3.5\"),CudaVersion(\"5.2\")\n"
+ "#define TF_CUDA_VERSION \"64_80\"\n"
+ "#define TF_CUDNN_VERSION \"64_5\"\n"
+ "#endif // CUDA_CUDA_CONFIG_H_\n"
+ )
+
+ # tf assumes in various places header files to be in cuda/include. On windows the cuda sdk
+ # installs them under cuda/version/include and to avoid that we need to change tf we copy a
+ # few files to cuda/include
+ FILE(COPY
+ ${CUDA_TOOLKIT_TARGET_DIR}/include/cuda.h ${CUDA_TOOLKIT_TARGET_DIR}/include/cuComplex.h
+ ${CUDA_TOOLKIT_TARGET_DIR}/include/cublas_v2.h ${CUDNN_HOME}/include/cudnn.h
+ ${CUDA_TOOLKIT_TARGET_DIR}/include/cufft.h ${CUDA_TOOLKIT_TARGET_DIR}/include/curand.h
+ DESTINATION ${tensorflow_source_dir}/third_party/gpus/cuda/include
+ )
+ include_directories(${tensorflow_source_dir}/third_party/gpus)
+ # add cuda libraries to tensorflow_EXTERNAL_LIBRARIES
+ list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES})
+ endif()
+endif()
+
# Let's get to work!
include(tf_core_framework.cmake)
include(tf_tools.cmake)
# NOTE: Disabled until issue #3996 is fixed.
# include(tf_stream_executor.cmake)
+if (tensorflow_ENABLE_GPU)
+ if (WIN32)
+ include(tf_stream_executor.cmake)
+ endif()
+endif()
+
include(tf_core_cpu.cmake)
include(tf_models.cmake)
include(tf_core_ops.cmake)
include(tf_core_direct_session.cmake)
+include(tf_core_kernels.cmake)
if(tensorflow_ENABLE_GRPC_SUPPORT)
include(tf_core_distributed_runtime.cmake)
endif()
-include(tf_core_kernels.cmake)
+
include(tf_cc_ops.cmake)
if(tensorflow_BUILD_CC_EXAMPLE)
include(tf_tutorials.cmake)
diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md
index daf5101..a3510b5 100644
--- a/tensorflow/contrib/cmake/README.md
+++ b/tensorflow/contrib/cmake/README.md
@@ -15,14 +15,13 @@
The CMake files in this directory can build the core TensorFlow runtime, an
example C++ binary, and a PIP package containing the runtime and Python
-bindings. Currently, only CPU builds are supported, but we are working on
-providing a GPU build as well.
+bindings.
Note: Windows support is in an **alpha** state, and we welcome your feedback.
### Pre-requisites
-* CMake version 3.1 or later
+* CMake version 3.1 up to 3.6
* [Git](http://git-scm.com)
@@ -45,21 +44,13 @@
- [Anaconda 4.1.1 (Python 3.5 64-bit)](https://www.continuum.io/downloads)
- [Git for Windows version 2.9.2.windows.1](https://git-scm.com/download/win)
- [swigwin-3.0.10](http://www.swig.org/download.html)
-
+ - [NVidia CUDA Toolkit 8.0] (https://developer.nvidia.com/cuda-downloads)
+ - [NVidia CUDNN 5.1] (https://developer.nvidia.com/cudnn)
* Ubuntu 14.04
- Makefile generator
- Docker 1.9.1 (for automated testing)
### Current known limitations
-
-* CPU support only
-
- - We are in the process of porting the GPU code in
- `tensorflow/stream_executor` to build with CMake and work on non-POSIX
- platforms.
-
-* Additional limitations for the Windows build:
-
- The Python package supports **Python 3.5 only**, because that is the only
version for which standard Python binaries exist and those binaries are
compatible with the TensorFlow runtime. (On Windows, the standard Python
@@ -114,6 +105,17 @@
D:\temp> "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64\vcvarsall.bat"
```
+ * When building with GPU support after installing the CUDNN zip file from NVidia, append its
+ bin directory to your PATH environment variable.
+ In case TensorFlow fails to find the CUDA dll's during initialization, check your PATH environment variable.
+ It should contain the directory of the CUDA dlls and the directory of the CUDNN dll.
+ For example:
+
+ ```
+ D:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin
+ D:\local\cuda\bin
+ ```
+
* We assume that `cmake` and `git` are installed and in your `%PATH%`. If
for example `cmake` is not in your path and it is installed in
`C:\Program Files (x86)\CMake\bin\cmake.exe`, you can add this directory
@@ -145,9 +147,14 @@
D:\...\build> cmake .. -A x64 -DCMAKE_BUILD_TYPE=Release ^
More? -DSWIG_EXECUTABLE=C:/tools/swigwin-3.0.10/swig.exe ^
More? -DPYTHON_EXECUTABLE=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/python.exe ^
- More? -DPYTHON_LIBRARIES=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/libs/python35.lib
+ More? -DPYTHON_LIBRARIES=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/libs/python35.lib
```
-
+ To build with GPU support add "^" at the end of the last line above following with:
+ ```
+ More? -Dtensorflow_ENABLE_GPU=ON ^
+ More? -DCUDNN_HOME="D:\...\cudnn"
+ ```
+
Note that the `-DCMAKE_BUILD_TYPE=Release` flag must match the build
configuration that you choose when invoking `msbuild`. The known-good
values are `Release` and `RelWithDebInfo`. The `Debug` build type is
@@ -184,6 +191,11 @@
SSL support (for making secure HTTP requests) in the TensorFlow runtime.
This support is incomplete, and will be used for Google Cloud Storage
support.
+
+ * `-Dtensorflow_ENABLE_GPU=(ON|OFF)`. Defaults to `OFF`. Include
+ GPU support. If GPU is enabled you need to install the CUDA 8.0 Toolkit and CUDNN 5.1.
+ CMake will expect the location of CUDNN in -DCUDNN_HOME=path_you_unziped_cudnn.
+
4. Invoke MSBuild to build TensorFlow.
@@ -202,7 +214,6 @@
D:\...\build> MSBuild /p:Configuration=Release tf_python_build_pip_package.vcxproj
```
-
Linux Continuous Integration build
==================================
diff --git a/tensorflow/contrib/cmake/setup.py b/tensorflow/contrib/cmake/setup.py
index bd1dade..1edc173 100644
--- a/tensorflow/contrib/cmake/setup.py
+++ b/tensorflow/contrib/cmake/setup.py
@@ -26,7 +26,7 @@
from setuptools.command.install import install as InstallCommandBase
from setuptools.dist import Distribution
-_VERSION = '0.11.0rc0-cmake-experimental'
+_VERSION = '0.11.0rc1-cmake-experimental'
REQUIRED_PACKAGES = [
'numpy >= 1.11.0',
diff --git a/tensorflow/contrib/cmake/tf_core_cpu.cmake b/tensorflow/contrib/cmake/tf_core_cpu.cmake
index 143f2e7..f850c40 100644
--- a/tensorflow/contrib/cmake/tf_core_cpu.cmake
+++ b/tensorflow/contrib/cmake/tf_core_cpu.cmake
@@ -21,13 +21,27 @@
"${tensorflow_source_dir}/tensorflow/core/common_runtime/session_factory.cc"
"${tensorflow_source_dir}/tensorflow/core/common_runtime/session_options.cc"
)
-
list(REMOVE_ITEM tf_core_cpu_srcs ${tf_core_cpu_exclude_srcs})
+
# We need to include stubs for the GPU tracer, which are in the exclude glob.
list(APPEND tf_core_cpu_srcs
"${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_tracer.cc"
"${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_tracer.h"
)
+if (tensorflow_ENABLE_GPU)
+ file(GLOB_RECURSE tf_core_gpu_srcs
+ "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/*.cc"
+ "${tensorflow_source_dir}/tensorflow/core/platform/default/gpu/cupti_wrapper.cc"
+ "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu_device_factory.cc"
+ )
+ file(GLOB_RECURSE tf_core_gpu_exclude_srcs
+ "${tensorflow_source_dir}/tensorflow/core/*test*.cc"
+ "${tensorflow_source_dir}/tensorflow/core/*test*.cc"
+ )
+ list(REMOVE_ITEM tf_core_gpu_srcs ${tf_core_gpu_exclude_srcs})
+ list(APPEND tf_core_cpu_srcs ${tf_core_gpu_srcs})
+endif()
+
add_library(tf_core_cpu OBJECT ${tf_core_cpu_srcs})
add_dependencies(tf_core_cpu tf_core_framework)
diff --git a/tensorflow/contrib/cmake/tf_core_distributed_runtime.cmake b/tensorflow/contrib/cmake/tf_core_distributed_runtime.cmake
index cf41e92..b3c06d2 100644
--- a/tensorflow/contrib/cmake/tf_core_distributed_runtime.cmake
+++ b/tensorflow/contrib/cmake/tf_core_distributed_runtime.cmake
@@ -38,9 +38,11 @@
$<TARGET_OBJECTS:tf_core_ops>
$<TARGET_OBJECTS:tf_core_direct_session>
$<TARGET_OBJECTS:tf_core_distributed_runtime>
+ $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
)
target_link_libraries(grpc_tensorflow_server PUBLIC
tf_protos_cc
+ ${tf_core_gpu_kernels_lib}
${tensorflow_EXTERNAL_LIBRARIES}
)
diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake
index 6927ecf..19b57f0 100644
--- a/tensorflow/contrib/cmake/tf_core_kernels.cmake
+++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake
@@ -38,6 +38,7 @@
"${tensorflow_source_dir}/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc"
"${tensorflow_source_dir}/tensorflow/contrib/metrics/kernels/set_kernels.cc"
"${tensorflow_source_dir}/tensorflow/contrib/metrics/ops/set_ops.cc"
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/blas_gemm.cc"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/gru_ops.cc"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/lstm_ops.cc"
"${tensorflow_source_dir}/tensorflow/contrib/rnn/ops/gru_ops.cc"
@@ -83,7 +84,7 @@
if(WIN32)
file(GLOB_RECURSE tf_core_kernels_windows_exclude_srcs
- # Not currently working on Windows:
+ # not working on windows yet
"${tensorflow_source_dir}/tensorflow/core/kernels/depthwise_conv_op.cc" # Cannot find symbol: tensorflow::LaunchConv2DOp<struct Eigen::ThreadPoolDevice, double>::launch(...).
"${tensorflow_source_dir}/tensorflow/core/kernels/fact_op.cc"
"${tensorflow_source_dir}/tensorflow/core/kernels/immutable_constant_op.cc"
@@ -93,14 +94,38 @@
"${tensorflow_source_dir}/tensorflow/core/kernels/sparse_matmul_op.h"
"${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h"
"${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.cc"
+ "${tensorflow_source_dir}/tensorflow/core/kernels/svd*.cc"
+ "${tensorflow_source_dir}/tensorflow/core/kernels/avgpooling_op.*"
)
list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_windows_exclude_srcs})
endif(WIN32)
+file(GLOB_RECURSE tf_core_gpu_kernels_srcs
+ "${tensorflow_source_dir}/tensorflow/core/kernels/*.cu.cc"
+ "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/*.cu.cc"
+)
+
+if(WIN32)
+ file(GLOB_RECURSE tf_core_gpu_kernels_exclude_srcs
+ # not working on windows yet
+ "${tensorflow_source_dir}/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc"
+ )
+ list(REMOVE_ITEM tf_core_gpu_kernels_srcs ${tf_core_gpu_kernels_exclude_srcs})
+endif(WIN32)
+
add_library(tf_core_kernels OBJECT ${tf_core_kernels_srcs})
+add_dependencies(tf_core_kernels tf_core_cpu)
if(WIN32)
target_compile_options(tf_core_kernels PRIVATE /MP)
+ if (tensorflow_ENABLE_GPU)
+ set_source_files_properties(${tf_core_gpu_kernels_srcs} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
+ set(tf_core_gpu_kernels_lib tf_core_gpu_kernels)
+ cuda_add_library(${tf_core_gpu_kernels_lib} ${tf_core_gpu_kernels_srcs})
+ set_target_properties(${tf_core_gpu_kernels_lib}
+ PROPERTIES DEBUG_POSTFIX ""
+ COMPILE_FLAGS "${TF_REGULAR_CXX_FLAGS}"
+ )
+ add_dependencies(${tf_core_gpu_kernels_lib} tf_core_cpu)
+ endif()
endif()
-
-add_dependencies(tf_core_kernels tf_core_cpu)
diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index d1029d3..8cdecf7 100644
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -302,12 +302,14 @@
$<TARGET_OBJECTS:tf_core_direct_session>
$<$<BOOL:${tensorflow_ENABLE_GRPC_SUPPORT}>:$<TARGET_OBJECTS:tf_core_distributed_runtime>>
$<TARGET_OBJECTS:tf_core_kernels>
+ $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
)
target_include_directories(pywrap_tensorflow PUBLIC
${PYTHON_INCLUDE_DIR}
${NUMPY_INCLUDE_DIR}
)
target_link_libraries(pywrap_tensorflow
+ ${tf_core_gpu_kernels_lib}
${tensorflow_EXTERNAL_LIBRARIES}
tf_protos_cc
${PYTHON_LIBRARIES}
diff --git a/tensorflow/contrib/cmake/tf_stream_executor.cmake b/tensorflow/contrib/cmake/tf_stream_executor.cmake
index b121ddf..bf45bb0 100644
--- a/tensorflow/contrib/cmake/tf_stream_executor.cmake
+++ b/tensorflow/contrib/cmake/tf_stream_executor.cmake
@@ -47,11 +47,17 @@
"${tensorflow_source_dir}/tensorflow/stream_executor/platform/default/*.h"
)
+if (tensorflow_ENABLE_GPU)
+ file(GLOB tf_stream_executor_gpu_srcs
+ "${tensorflow_source_dir}/tensorflow/stream_executor/cuda/*.cc"
+ )
+ list(APPEND tf_stream_executor_srcs ${tf_stream_executor_gpu_srcs})
+endif()
+
#file(GLOB_RECURSE tf_stream_executor_test_srcs
# "${tensorflow_source_dir}/tensorflow/stream_executor/*_test.cc"
# "${tensorflow_source_dir}/tensorflow/stream_executor/*_test.h"
#)
-#
#list(REMOVE_ITEM tf_stream_executor_srcs ${tf_stream_executor_test_srcs})
add_library(tf_stream_executor OBJECT ${tf_stream_executor_srcs})
diff --git a/tensorflow/contrib/cmake/tf_tutorials.cmake b/tensorflow/contrib/cmake/tf_tutorials.cmake
index 8a23d02..d6547d6 100644
--- a/tensorflow/contrib/cmake/tf_tutorials.cmake
+++ b/tensorflow/contrib/cmake/tf_tutorials.cmake
@@ -12,9 +12,11 @@
$<TARGET_OBJECTS:tf_cc_ops>
$<TARGET_OBJECTS:tf_core_ops>
$<TARGET_OBJECTS:tf_core_direct_session>
+ $<$<BOOL:${tensorflow_ENABLE_GPU}>:$<TARGET_OBJECTS:tf_stream_executor>>
)
target_link_libraries(tf_tutorials_example_trainer PUBLIC
tf_protos_cc
+ ${tf_core_gpu_kernels_lib}
${tensorflow_EXTERNAL_LIBRARIES}
)
diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py
index f07e8a3..7f19d42 100644
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@@ -942,6 +942,7 @@
kernel_size,
stride=1,
padding='SAME',
+ data_format=DATA_FORMAT_NHWC,
activation_fn=nn.relu,
normalizer_fn=None,
normalizer_params=None,
@@ -961,7 +962,9 @@
second variable called 'biases' is added to the result of the operation.
Args:
- inputs: a tensor of size [batch_size, height, width, channels].
+ inputs: A 4-D `Tensor` of type `float` and shape
+ `[batch, height, width, in_channels]` for `NHWC` data format or
+ `[batch, in_channels, height, width]` for `NCHW` data format.
num_outputs: integer, the number of output filters.
kernel_size: a list of length 2 holding the [kernel_height, kernel_width] of
of the filters. Can be an int if both values are the same.
@@ -969,6 +972,7 @@
Can be an int if both strides are the same. Note that presently
both strides must have the same value.
padding: one of 'VALID' or 'SAME'.
+ data_format: A string. `NHWC` (default) and `NCHW` are supported.
activation_fn: activation function, set to None to skip it and maintain
a linear activation.
normalizer_fn: normalization function to use instead of `biases`. If
@@ -993,14 +997,23 @@
Raises:
ValueError: if 'kernel_size' is not a list of length 2.
+ ValueError: if `data_format` is neither `NHWC` nor `NCHW`.
+ ValueError: if `C` dimension of `inputs` is None.
"""
with variable_scope.variable_scope(
scope, 'Conv2d_transpose', [inputs], reuse=reuse) as sc:
+ if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
+ raise ValueError('data_format has to be either NCHW or NHWC.')
dtype = inputs.dtype.base_dtype
kernel_h, kernel_w = utils.two_element_tuple(kernel_size)
stride_h, stride_w = utils.two_element_tuple(stride)
- num_filters_in = utils.last_dimension(
- inputs.get_shape(), min_rank=4)
+ if data_format == DATA_FORMAT_NCHW:
+ c_axis, h_axis, w_axis = 1, 2, 3
+ else:
+ h_axis, w_axis, c_axis = 1, 2, 3
+ num_filters_in = inputs.get_shape()[c_axis].value
+ if num_filters_in is None:
+ raise ValueError('`C` dimension of `inputs` must be known but is None.')
weights_shape = [kernel_h, kernel_w, num_outputs, num_filters_in]
weights_collections = utils.get_variable_collections(
variables_collections, 'weights')
@@ -1015,7 +1028,7 @@
inputs_shape = array_ops.shape(inputs)
batch_size = inputs_shape[0]
- height, width = inputs_shape[1], inputs_shape[2]
+ height, width = inputs_shape[h_axis], inputs_shape[w_axis]
def get_deconv_dim(dim_size, stride_size, kernel_size, padding):
if isinstance(dim_size, ops.Tensor):
@@ -1031,17 +1044,25 @@
out_height = get_deconv_dim(height, stride_h, kernel_h, padding)
out_width = get_deconv_dim(width, stride_w, kernel_w, padding)
- output_shape = array_ops.pack(
- [batch_size, out_height, out_width, num_outputs])
+ if data_format == DATA_FORMAT_NHWC:
+ output_shape = [batch_size, out_height, out_width, num_outputs]
+ strides = [1, stride_h, stride_w, 1]
+ else:
+ output_shape = [batch_size, num_outputs, out_height, out_width]
+ strides = [1, 1, stride_h, stride_w]
+
+
+ output_shape = array_ops.pack(output_shape)
outputs = nn.conv2d_transpose(inputs, weights, output_shape,
- [1, stride_h, stride_w, 1],
- padding=padding)
+ strides,
+ padding=padding,
+ data_format=data_format)
# Infer the static output shape:
out_shape = inputs.get_shape().as_list()
- out_shape[-1] = num_outputs
- out_shape[1] = get_deconv_dim(out_shape[1], stride_h, kernel_h, padding)
- out_shape[2] = get_deconv_dim(out_shape[2], stride_w, kernel_w, padding)
+ out_shape[c_axis] = num_outputs
+ out_shape[h_axis] = get_deconv_dim(out_shape[h_axis], stride_h, kernel_h, padding)
+ out_shape[w_axis] = get_deconv_dim(out_shape[w_axis], stride_w, kernel_w, padding)
outputs.set_shape(out_shape)
if normalizer_fn is not None:
@@ -1057,7 +1078,7 @@
initializer=biases_initializer,
regularizer=biases_regularizer,
collections=biases_collections)
- outputs = nn.bias_add(outputs, biases)
+ outputs = nn.bias_add(outputs, biases, data_format=data_format)
if activation_fn is not None:
outputs = activation_fn(outputs)
diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py
index 0ee6eb1..ff0d0a2 100644
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@@ -588,6 +588,175 @@
class Convolution2dTransposeTests(tf.test.TestCase):
+ def testInvalidDataFormat(self):
+ height, width = 7, 9
+ with self.test_session():
+ images = tf.random_uniform((5, height, width, 3), seed=1)
+ with self.assertRaisesRegexp(
+ ValueError, 'data_format has to be either NCHW or NHWC.'):
+ tf.contrib.layers.convolution2d_transpose(
+ images, 32, 3, data_format='CHWN')
+
+
+ def testOutputSizeWithStrideOneSamePaddingNCHW(self):
+ # `NCHW` data fomat is only supported for `GPU` device.
+ if tf.test.is_gpu_available():
+ with self.test_session(use_gpu=True) as sess:
+ num_filters = 32
+ input_size = [5, 3, 10, 12]
+ expected_size = [5, num_filters, 10, 12]
+
+ images = tf.random_uniform(input_size, seed=1)
+ output = tf.contrib.layers.conv2d_transpose(
+ images, num_filters, [3, 3], stride=1,
+ padding='SAME', data_format='NCHW')
+ self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
+
+ sess.run(tf.initialize_all_variables())
+ self.assertListEqual(list(output.eval().shape), expected_size)
+
+
+ def testOutputSizeWithStrideOneValidPaddingNCHW(self):
+ if tf.test.is_gpu_available():
+ with self.test_session(use_gpu=True) as sess:
+ num_filters = 32
+ input_size = [5, 3, 10, 12]
+ expected_size = [5, num_filters, 12, 14]
+
+ images = tf.random_uniform(input_size, seed=1)
+ output = tf.contrib.layers.conv2d_transpose(
+ images, num_filters, [3, 3], stride=1,
+ padding='VALID', data_format='NCHW')
+ self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
+
+ sess.run(tf.initialize_all_variables())
+ self.assertListEqual(list(output.eval().shape), expected_size)
+
+ def testOutputSizeWithStrideTwoValidPaddingNCHW(self):
+ if tf.test.is_gpu_available():
+ with self.test_session(use_gpu=True) as sess:
+ num_filters = 32
+ input_size = [5, 3, 9, 11]
+ expected_size = [5, num_filters, 19, 23]
+
+ images = tf.random_uniform(input_size, seed=1)
+ output = tf.contrib.layers.conv2d_transpose(
+ images, num_filters, [3, 3], stride=[2, 2],
+ padding='VALID', data_format='NCHW')
+ self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
+ self.assertListEqual(list(output.get_shape().as_list()), expected_size)
+
+ sess.run(tf.initialize_all_variables())
+ self.assertListEqual(list(output.eval().shape), expected_size)
+
+ def testOutputSizeWith1x1StrideTwoSamePaddingNCHW(self):
+ if tf.test.is_gpu_available():
+ with self.test_session(use_gpu=True) as sess:
+ num_filters = 1
+ input_size = [1, 1, 1, 1]
+ expected_size = [1, num_filters, 2, 2]
+
+ images = tf.random_uniform(input_size, seed=1)
+ output = tf.contrib.layers.conv2d_transpose(
+ images, num_filters, [2, 2], stride=[2, 2],
+ padding='SAME', data_format='NCHW')
+ self.assertListEqual(list(output.get_shape().as_list()), expected_size)
+
+ sess.run(tf.initialize_all_variables())
+ self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
+ self.assertListEqual(list(output.eval().shape), expected_size)
+
+ def testOutputSizeWith1x1StrideTwoValidPaddingNCHW(self):
+ if tf.test.is_gpu_available():
+ with self.test_session(use_gpu=True) as sess:
+ num_filters = 1
+ input_size = [1, 1, 1, 1]
+ expected_size = [1, num_filters, 2, 2]
+
+ images = tf.random_uniform(input_size, seed=1)
+ output = tf.contrib.layers.conv2d_transpose(
+ images, num_filters, [2, 2], stride=[2, 2],
+ padding='VALID', data_format='NCHW')
+ sess.run(tf.initialize_all_variables())
+ self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
+ self.assertListEqual(list(output.eval().shape), expected_size)
+
+ def testOutputSizeWith2x2StrideTwoSamePaddingNCHW(self):
+ if tf.test.is_gpu_available():
+ with self.test_session(use_gpu=True) as sess:
+ num_filters = 1
+ input_size = [1, 1, 2, 2]
+ expected_size = [1, num_filters, 4, 4]
+
+ images = tf.random_uniform(input_size, seed=1)
+ output = tf.contrib.layers.conv2d_transpose(
+ images, num_filters, [2, 2], stride=[2, 2],
+ padding='SAME', data_format='NCHW')
+ sess.run(tf.initialize_all_variables())
+ self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
+ self.assertListEqual(list(output.eval().shape), expected_size)
+
+ def testOutputSizeWith2x2StrideTwoValidPaddingNCHW(self):
+ if tf.test.is_gpu_available():
+ with self.test_session(use_gpu=True) as sess:
+ num_filters = 1
+ input_size = [1, 1, 2, 2]
+ expected_size = [1, num_filters, 4, 4]
+
+ images = tf.random_uniform(input_size, seed=1)
+ output = tf.contrib.layers.conv2d_transpose(
+ images, num_filters, [2, 2], stride=[2, 2],
+ padding='VALID', data_format='NCHW')
+ sess.run(tf.initialize_all_variables())
+ self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
+ self.assertListEqual(list(output.eval().shape), expected_size)
+
+ def testOutputSizeWithStride2x1NCHW(self):
+ if tf.test.is_gpu_available():
+ with self.test_session(use_gpu=True) as sess:
+ num_filters = 1
+ input_size = [1, 1, 3, 2]
+ expected_size = [1, num_filters, 6, 5]
+
+ images = tf.random_uniform(input_size, seed=1)
+ output = tf.contrib.layers.conv2d_transpose(
+ images, num_filters, [2, 4], stride=[2, 1],
+ padding='VALID', data_format='NCHW')
+ sess.run(tf.initialize_all_variables())
+ self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
+ self.assertListEqual(list(output.eval().shape), expected_size)
+
+ def testOutputSizeWithStride2x4NCHW(self):
+ if tf.test.is_gpu_available():
+ with self.test_session(use_gpu=True) as sess:
+ num_filters = 1
+ input_size = [1, 1, 3, 2]
+ expected_size = [1, num_filters, 6, 8]
+
+ images = tf.random_uniform(input_size, seed=1)
+ output = tf.contrib.layers.conv2d_transpose(
+ images, num_filters, [2, 4], stride=[2, 4],
+ padding='VALID', data_format='NCHW')
+ sess.run(tf.initialize_all_variables())
+ self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
+ self.assertListEqual(list(output.eval().shape), expected_size)
+
+ def testOutputSizeWithStride2x5NCHW(self):
+ if tf.test.is_gpu_available():
+ with self.test_session(use_gpu=True) as sess:
+ num_filters = 1
+ input_size = [1, 1, 3, 2]
+ expected_size = [1, num_filters, 6, 10]
+
+ images = tf.random_uniform(input_size, seed=1)
+ output = tf.contrib.layers.conv2d_transpose(
+ images, num_filters, [2, 4], stride=[2, 5],
+ padding='VALID', data_format='NCHW')
+ sess.run(tf.initialize_all_variables())
+ self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
+ self.assertListEqual(list(output.eval().shape), expected_size)
+
+
def testOutputSizeWithStrideOneSamePadding(self):
num_filters = 32
input_size = [5, 10, 12, 3]
diff --git a/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py b/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
index 2924fd6..8491bb7 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
@@ -244,7 +244,7 @@
session.run(tf.initialize_local_variables())
coord = tf.train.Coordinator()
- tf.train.start_queue_runners(session, coord=coord)
+ threads = tf.train.start_queue_runners(session, coord=coord)
self.assertAllEqual(session.run(inputs), [b"ABC"])
self.assertAllEqual(session.run(inputs), [b"DEF"])
@@ -253,6 +253,7 @@
session.run(inputs)
coord.request_stop()
+ coord.join(threads)
def test_read_keyed_batch_features_mutual_exclusive_args(self):
filename = self._create_temp_file("abcde")
@@ -307,6 +308,7 @@
coord.request_stop()
coord.join(threads)
+
parsed_records = [item for sublist in [d["sequence"] for d in data]
for item in sublist]
# Check that the number of records matches expected and all records
@@ -331,7 +333,7 @@
session.run(tf.initialize_local_variables())
coord = tf.train.Coordinator()
- tf.train.start_queue_runners(session, coord=coord)
+ threads = tf.train.start_queue_runners(session, coord=coord)
self.assertEqual("%s:1" % name, inputs.name)
file_name_queue_name = "%s/file_name_queue" % name
@@ -352,6 +354,7 @@
session.run(inputs)
coord.request_stop()
+ coord.join(threads)
def test_read_text_lines_multifile_with_shared_queue(self):
gfile.Glob = self._orig_glob
@@ -375,7 +378,7 @@
session.run(tf.initialize_local_variables())
coord = tf.train.Coordinator()
- tf.train.start_queue_runners(session, coord=coord)
+ threads = tf.train.start_queue_runners(session, coord=coord)
self.assertEqual("%s:1" % name, inputs.name)
shared_file_name_queue_name = "%s/file_name_queue" % name
@@ -398,6 +401,7 @@
session.run(inputs)
coord.request_stop()
+ coord.join(threads)
def _get_qr(self, name):
for qr in ops.get_collection(ops.GraphKeys.QUEUE_RUNNERS):
@@ -490,7 +494,7 @@
session.run(tf.initialize_local_variables())
coord = tf.train.Coordinator()
- tf.train.start_queue_runners(session, coord=coord)
+ threads = tf.train.start_queue_runners(session, coord=coord)
self.assertAllEqual(session.run(inputs), [b"A", b"B", b"C"])
self.assertAllEqual(session.run(inputs), [b"D", b"E"])
@@ -498,6 +502,7 @@
session.run(inputs)
coord.request_stop()
+ coord.join(threads)
def test_keyed_read_text_lines(self):
gfile.Glob = self._orig_glob
@@ -517,7 +522,7 @@
session.run(tf.initialize_local_variables())
coord = tf.train.Coordinator()
- tf.train.start_queue_runners(session, coord=coord)
+ threads = tf.train.start_queue_runners(session, coord=coord)
self.assertAllEqual(session.run([keys, inputs]),
[[filename.encode("utf-8") + b":1"], [b"ABC"]])
@@ -529,6 +534,7 @@
session.run(inputs)
coord.request_stop()
+ coord.join(threads)
def test_keyed_parse_json(self):
gfile.Glob = self._orig_glob
@@ -557,7 +563,7 @@
session.run(tf.initialize_local_variables())
coord = tf.train.Coordinator()
- tf.train.start_queue_runners(session, coord=coord)
+ threads = tf.train.start_queue_runners(session, coord=coord)
key, age = session.run([keys, inputs["age"]])
self.assertAllEqual(age, [[0]])
@@ -572,6 +578,7 @@
session.run(inputs)
coord.request_stop()
+ coord.join(threads)
if __name__ == "__main__":
diff --git a/tensorflow/contrib/learn/python/learn/utils/export_test.py b/tensorflow/contrib/learn/python/learn/utils/export_test.py
index 0f1c7e6..329a486 100644
--- a/tensorflow/contrib/learn/python/learn/utils/export_test.py
+++ b/tensorflow/contrib/learn/python/learn/utils/export_test.py
@@ -21,6 +21,7 @@
import os
import random
+import six
import tempfile
import numpy as np
@@ -63,8 +64,8 @@
# Only the written checkpoints are exported.
self.assertTrue(tf.gfile.Exists(export_dir + '00000001/export'))
self.assertTrue(tf.gfile.Exists(export_dir + '00000010/export'))
- self.assertEquals(export_monitor.last_export_dir, os.path.join(export_dir,
- '00000010'))
+ self.assertEquals(export_monitor.last_export_dir,
+ six.b(os.path.join(export_dir, '00000010')))
# Validate the signature
signature = self._get_default_signature(export_dir + '00000010/export.meta')
self.assertTrue(signature.HasField('regression_signature'))
@@ -86,8 +87,8 @@
# Only the written checkpoints are exported.
self.assertTrue(tf.gfile.Exists(export_dir + '00000001/export'))
self.assertTrue(tf.gfile.Exists(export_dir + '00000010/export'))
- self.assertEquals(export_monitor.last_export_dir, os.path.join(export_dir,
- '00000010'))
+ self.assertEquals(export_monitor.last_export_dir,
+ six.b(os.path.join(export_dir, '00000010')))
# Validate the signature
signature = self._get_default_signature(export_dir + '00000010/export.meta')
self.assertTrue(signature.HasField('generic_signature'))
diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h
index c13f67f..0b528cb 100644
--- a/tensorflow/core/common_runtime/bfc_allocator.h
+++ b/tensorflow/core/common_runtime/bfc_allocator.h
@@ -351,6 +351,10 @@
inline int Log2FloorNonZero(uint64 n) {
#if defined(__GNUC__)
return 63 ^ __builtin_clzll(n);
+#elif defined(PLATFORM_WINDOWS)
+ unsigned long index;
+ _BitScanReverse64(&index, n);
+ return index;
#else
int r = 0;
while (n > 0) {
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index 30f1a28..e9c48a3 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -873,7 +873,9 @@
if (visible_device_list.empty()) {
visible_gpu_order.resize(gpu_manager->VisibleDeviceCount());
// By default, visible to virtual mapping is unchanged.
- std::iota(visible_gpu_order.begin(), visible_gpu_order.end(), 0);
+ int deviceNo = 0;
+ std::generate(visible_gpu_order.begin(), visible_gpu_order.end(),
+ [&deviceNo]{ return deviceNo++; });
} else {
std::vector<string> order_str = str_util::Split(visible_device_list, ',');
for (int i = 0; i < order_str.size(); ++i) {
diff --git a/tensorflow/core/common_runtime/gpu/gpu_tracer.cc b/tensorflow/core/common_runtime/gpu/gpu_tracer.cc
index 82d8b71..ee93b19 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_tracer.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_tracer.cc
@@ -254,6 +254,10 @@
return manager;
}
+#ifdef _MSC_VER
+#define __thread __declspec(thread)
+#endif
+
// TODO(pbar) Move this to platform specific header file?
// Static thread local variable for POD types.
#define TF_STATIC_THREAD_LOCAL_POD(_Type_, _var_) \
diff --git a/tensorflow/core/common_runtime/gpu/pool_allocator.cc b/tensorflow/core/common_runtime/gpu/pool_allocator.cc
index e0362b3..700ac34 100644
--- a/tensorflow/core/common_runtime/gpu/pool_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/pool_allocator.cc
@@ -16,8 +16,10 @@
#include "tensorflow/core/common_runtime/gpu/pool_allocator.h"
#include <errno.h>
+#ifndef _MSC_VER
#include <strings.h>
#include <sys/mman.h> // for munmap
+#endif
#include <map>
#include <utility>
diff --git a/tensorflow/core/common_runtime/gpu/process_state.cc b/tensorflow/core/common_runtime/gpu/process_state.cc
index d7e72df..ea9b42f 100644
--- a/tensorflow/core/common_runtime/gpu/process_state.cc
+++ b/tensorflow/core/common_runtime/gpu/process_state.cc
@@ -126,7 +126,7 @@
gpu::StreamExecutor* se =
gpu_platform->ExecutorForDevice(gpu_id).ValueOrDie();
int bus_id = se->GetDeviceDescription().numa_node();
- if (bus_id < static_cast<int64>(gpu_visitors_.size())) {
+ if (bus_id >= 0 && bus_id < static_cast<int64>(gpu_visitors_.size())) {
for (auto v : gpu_visitors_[bus_id]) {
gpu_allocators_[gpu_id]->AddAllocVisitor(v);
}
diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h
index 35e009c..4f8eb04 100644
--- a/tensorflow/core/framework/allocator.h
+++ b/tensorflow/core/framework/allocator.h
@@ -152,7 +152,7 @@
// allocated by this allocator.
virtual size_t RequestedSize(void* ptr) {
CHECK(false) << "allocator doesn't track sizes";
- return 0;
+ return size_t(0);
}
// Returns the allocated size of the buffer at 'ptr' if known,
diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h
index acba116..8f0075d 100644
--- a/tensorflow/core/framework/device_base.h
+++ b/tensorflow/core/framework/device_base.h
@@ -149,6 +149,7 @@
// attributes requested. See allocator.h for more details.
virtual Allocator* GetAllocator(AllocatorAttributes /*attr*/) {
LOG(FATAL) << "GetAllocator() is not implemented.";
+ return nullptr;
}
// Return the Allocator implementation to use based on the allocator
@@ -180,6 +181,8 @@
virtual const DeviceAttributes& attributes() const {
LOG(FATAL) << "Device does not implement attributes()";
+ static DeviceAttributes dummy;
+ return dummy;
}
// Materializes the given TensorProto into 'tensor' stored in Device
diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc
index 0776a1c..4d8d378 100644
--- a/tensorflow/core/framework/tensor_test.cc
+++ b/tensorflow/core/framework/tensor_test.cc
@@ -349,6 +349,15 @@
TEST(Tensor_Scalar, Basics) {
{
+ Tensor t(DT_BOOL, TensorShape({}));
+ EXPECT_EQ(1, t.NumElements());
+ auto Tt = t.scalar<bool>();
+ EXPECT_EQ(1, Tt.size());
+ EXPECT_EQ(0, Tt.rank());
+ t.scalar<bool>()() = true;
+ EXPECT_TRUE(Tt());
+ }
+ {
Tensor t(DT_FLOAT, TensorShape({}));
EXPECT_EQ(1, t.NumElements());
auto Tt = t.scalar<float>();
diff --git a/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc
index d71fdac..a54dbdf 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_select.cu.cc
@@ -16,6 +16,7 @@
#if GOOGLE_CUDA
#include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
namespace tensorflow {
namespace functor {
@@ -32,6 +33,28 @@
};
template <typename T>
+struct SelectScalarFunctor<GPUDevice, T> {
+ void operator()(const GPUDevice& d, typename TTypes<T>::Flat out,
+ typename TTypes<bool>::ConstScalar cond,
+ typename TTypes<T>::ConstFlat then_flat,
+ typename TTypes<T>::ConstFlat else_flat) {
+
+#if !defined(EIGEN_HAS_INDEX_LIST)
+ Eigen::array<int, 1> rank1{1};
+#else
+ Eigen::IndexList<Eigen::type2index<1>> rank1;
+#endif
+ const int size = then_flat.dimension(0);
+ Eigen::array<int, 1> broadcast_dims{size};
+
+ To32Bit(out).device(d) = cond.reshape(rank1)
+ .broadcast(broadcast_dims)
+ .select(then_flat, else_flat);
+
+ }
+};
+
+template <typename T>
struct BatchSelectFunctor<GPUDevice, T> {
void operator()(const GPUDevice& d,
typename TTypes<T>::Matrix output_flat_outer_dims,
@@ -68,6 +91,7 @@
#define SELECT_FUNCTOR(T) \
template struct SelectFunctor<GPUDevice, T>; \
+ template struct SelectScalarFunctor<GPUDevice, T>; \
template struct BatchSelectFunctor<GPUDevice, T>;
SELECT_FUNCTOR(Eigen::half);
diff --git a/tensorflow/core/kernels/cwise_op_select.cc b/tensorflow/core/kernels/cwise_op_select.cc
index fbfde88..8160fb7 100644
--- a/tensorflow/core/kernels/cwise_op_select.cc
+++ b/tensorflow/core/kernels/cwise_op_select.cc
@@ -41,6 +41,11 @@
OP_REQUIRES_OK(ctx, ctx->input("t", &then));
OP_REQUIRES_OK(ctx, ctx->input("e", &else_));
+ if (TensorShapeUtils::IsScalar(cond->shape())){
+ ComputeScalar(ctx, cond, then, else_);
+ return;
+ }
+
bool broadcasting = (TensorShapeUtils::IsVector(cond->shape()) &&
!TensorShapeUtils::IsVector(then->shape()));
@@ -108,6 +113,25 @@
}
}
+ void ComputeScalar(OpKernelContext* ctx, const Tensor* cond,
+ const Tensor* then, const Tensor* else_) {
+ OP_REQUIRES(
+ ctx, then->shape().IsSameSize(else_->shape()),
+ errors::InvalidArgument(
+ "'then' and 'else' must have the same size. but received: ",
+ then->shape().DebugString(), " vs. ",
+ else_->shape().DebugString()));
+
+ Tensor* output = nullptr;
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(0, then->shape(), &output));
+
+ if (output->NumElements() > 0) {
+ functor::SelectScalarFunctor<Device, T> func;
+ TTypes<bool>::ConstScalar cond_scalar = cond->scalar<bool>();
+ func(ctx->eigen_device<Device>(), output->flat<T>(), cond_scalar,
+ then->flat<T>(), else_->flat<T>());
+ }
+ }
private:
TF_DISALLOW_COPY_AND_ASSIGN(SelectOp);
};
@@ -152,6 +176,17 @@
}
};
+// CPU Specializations of Select functors with scalar
+template <typename T>
+struct SelectScalarFunctor<CPUDevice, T> {
+ void operator()(const CPUDevice& d, typename TTypes<T>::Flat out,
+ TTypes<bool>::ConstScalar cond,
+ typename TTypes<T>::ConstFlat then_flat,
+ typename TTypes<T>::ConstFlat else_flat) {
+ out.device(d) = cond() ? then_flat : else_flat;
+ }
+};
+
template <typename T>
struct BatchSelectFunctor<CPUDevice, T> {
void operator()(const CPUDevice& d,
diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h
index 2a77376..572a729 100644
--- a/tensorflow/core/kernels/cwise_ops.h
+++ b/tensorflow/core/kernels/cwise_ops.h
@@ -720,6 +720,14 @@
};
template <typename Device, typename T>
+struct SelectScalarFunctor {
+ void operator()(const Device& d, typename TTypes<T>::Flat out,
+ typename TTypes<bool>::ConstScalar cond,
+ typename TTypes<T>::ConstFlat then_flat,
+ typename TTypes<T>::ConstFlat else_flat);
+};
+
+template <typename Device, typename T>
struct BatchSelectFunctor {
void operator()(const Device& d,
typename TTypes<T>::Matrix output_flat_outer_dims,
diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
index 0acf82c..b256d24 100644
--- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc
@@ -21,7 +21,11 @@
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/util/cuda_kernel_helper.h"
+#if !defined(_MSC_VER)
#define UNROLL _Pragma("unroll")
+#else
+#define UNROLL
+#endif
namespace tensorflow {
diff --git a/tensorflow/core/kernels/matrix_triangular_solve_op.cc b/tensorflow/core/kernels/matrix_triangular_solve_op.cc
index 09f75f2..5f30a95 100644
--- a/tensorflow/core/kernels/matrix_triangular_solve_op.cc
+++ b/tensorflow/core/kernels/matrix_triangular_solve_op.cc
@@ -25,8 +25,25 @@
#include "tensorflow/core/platform/macros.h"
#include "tensorflow/core/platform/types.h"
+#if GOOGLE_CUDA
+#include "tensorflow/core/platform/stream_executor.h"
+#endif // GOOGLE_CUDA
+
namespace tensorflow {
+#if GOOGLE_CUDA
+namespace {
+template <typename Scalar>
+perftools::gputools::DeviceMemory<Scalar> AsDeviceMemory(
+ const Scalar* cuda_memory) {
+ perftools::gputools::DeviceMemoryBase wrapped(
+ const_cast<Scalar*>(cuda_memory));
+ perftools::gputools::DeviceMemory<Scalar> typed(wrapped);
+ return typed;
+}
+} // namespace
+#endif // GOOGLE_CUDA
+
template <class Scalar>
class MatrixTriangularSolveOp : public LinearAlgebraOp<Scalar> {
public:
@@ -60,7 +77,9 @@
int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const final {
double rows = static_cast<double>(input_matrix_shapes[0].dim_size(0));
double num_rhss = static_cast<double>(input_matrix_shapes[1].dim_size(1));
- double cost = rows * rows * num_rhss;
+ double cost = rows * rows * num_rhss *
+ (Eigen::TensorOpCost::AddCost<Scalar>() +
+ Eigen::TensorOpCost::MulCost<Scalar>());
return cost >= static_cast<double>(kint64max) ? kint64max
: static_cast<int64>(cost);
}
@@ -103,6 +122,121 @@
TF_DISALLOW_COPY_AND_ASSIGN(MatrixTriangularSolveOp);
};
+
+#ifdef GOOGLE_CUDA
+template <class Scalar>
+class MatrixTriangularSolveOpGPU : public LinearAlgebraOp<Scalar> {
+ public:
+ typedef LinearAlgebraOp<Scalar> Base;
+
+ explicit MatrixTriangularSolveOpGPU(OpKernelConstruction* context)
+ : Base(context), lower_(true), adjoint_(false) {
+ OP_REQUIRES_OK(context, context->GetAttr("lower", &lower_));
+ OP_REQUIRES_OK(context, context->GetAttr("adjoint", &adjoint_));
+ }
+
+ using TensorShapes = typename Base::TensorShapes;
+ using Matrix = typename Base::Matrix;
+ using MatrixMap = typename Base::MatrixMap;
+ using MatrixMaps = typename Base::MatrixMaps;
+ using ConstMatrixMap = typename Base::ConstMatrixMap;
+ using ConstMatrixMaps = typename Base::ConstMatrixMaps;
+
+ virtual void ValidateInputMatrixShapes(
+ OpKernelContext* context,
+ const TensorShapes& input_matrix_shapes) const final {
+ Base::ValidateSquareSolver(context, input_matrix_shapes);
+ }
+
+ TensorShapes GetOutputMatrixShapes(
+ const TensorShapes& input_matrix_shapes) const final {
+ return TensorShapes({TensorShape({input_matrix_shapes[0].dim_size(1),
+ input_matrix_shapes[1].dim_size(1)})});
+ }
+
+ int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const final {
+ double rows = static_cast<double>(input_matrix_shapes[0].dim_size(0));
+ double num_rhss = static_cast<double>(input_matrix_shapes[1].dim_size(1));
+ double cost = rows * rows * num_rhss *
+ (Eigen::TensorOpCost::AddCost<Scalar>() +
+ Eigen::TensorOpCost::MulCost<Scalar>());
+ return cost >= static_cast<double>(kint64max) ? kint64max
+ : static_cast<int64>(cost);
+ }
+
+ void ComputeMatrix(OpKernelContext* context, const ConstMatrixMaps& inputs,
+ MatrixMaps* outputs) final {
+ const ConstMatrixMap& matrix = inputs[0];
+ const ConstMatrixMap& rhs = inputs[1];
+ MatrixMap& output = outputs->at(0);
+
+ if (matrix.rows() == 0 || rhs.cols() == 0) {
+ // To be consistent with the MatrixInverse op, we define the solution for
+ // an empty set of equation as the empty matrix.
+ return;
+ }
+
+ auto matrix_ptr = AsDeviceMemory(matrix.data());
+ auto rhs_ptr = AsDeviceMemory(rhs.data());
+ auto out_ptr = AsDeviceMemory(output.data());
+
+ auto* stream = context->op_device_context()->stream();
+ uint64 rhs_elems = rhs.rows() * rhs.cols();
+ bool copy_status =
+ stream->ThenMemcpyD2D(&out_ptr, rhs_ptr, sizeof(Scalar) * rhs_elems)
+ .ok();
+ if (!copy_status) {
+ context->SetStatus(
+ errors::Internal("Failed to copy rhs into output before solve"));
+ }
+
+ // Cublas does
+ // output = matrix \ rhs
+ // where matrix, rhs and output are assumed to be in column major.
+ // We want the output to be in row-major, so we can compute
+ // output' = rhs' / matrix' (' stands for transpose)
+ // Upper/lower needs to be swapped for this.
+
+ perftools::gputools::blas::UpperLower upper_lower_matrix;
+ perftools::gputools::blas::Transpose transpose_matrix;
+ if (lower_) {
+ upper_lower_matrix = perftools::gputools::blas::UpperLower::kUpper;
+ } else {
+ upper_lower_matrix = perftools::gputools::blas::UpperLower::kLower;
+ }
+ if (adjoint_) {
+ transpose_matrix = perftools::gputools::blas::Transpose::kTranspose;
+ } else {
+ transpose_matrix = perftools::gputools::blas::Transpose::kNoTranspose;
+ }
+ uint64 leading_dim_matrix = matrix.cols();
+ uint64 leading_dim_output = output.cols();
+ uint64 colmajor_rows = output.cols();
+ uint64 colmajor_cols = output.rows();
+ bool blas_launch_status =
+ stream
+ ->ThenBlasTrsm(perftools::gputools::blas::Side::kRight /*side*/,
+ upper_lower_matrix /*uplo*/,
+ transpose_matrix /*trans*/,
+ perftools::gputools::blas::Diagonal::kNonUnit /*diag*/,
+ colmajor_rows /*m*/, colmajor_cols /*n*/,
+ Scalar(1.0) /*alpha*/,
+ matrix_ptr, leading_dim_matrix /*lda*/,
+ &out_ptr, leading_dim_output /*ldb*/)
+ .ok();
+ if (!blas_launch_status) {
+ context->SetStatus(errors::Internal("Blas TRSM launch failed"));
+ }
+ }
+
+ private:
+ bool lower_;
+ bool adjoint_;
+
+ TF_DISALLOW_COPY_AND_ASSIGN(MatrixTriangularSolveOpGPU);
+};
+#endif // GOOGLE_CUDA
+
REGISTER_LINALG_OP("MatrixTriangularSolve", (MatrixTriangularSolveOp<float>),
float);
REGISTER_LINALG_OP("MatrixTriangularSolve", (MatrixTriangularSolveOp<double>),
@@ -112,4 +246,30 @@
REGISTER_LINALG_OP("BatchMatrixTriangularSolve",
(MatrixTriangularSolveOp<double>), double);
+#ifdef GOOGLE_CUDA
+REGISTER_KERNEL_BUILDER(
+ Name("MatrixTriangularSolve")
+ .Device(DEVICE_GPU)
+ .TypeConstraint<float>("T"),
+ MatrixTriangularSolveOpGPU<float>);
+
+REGISTER_KERNEL_BUILDER(
+ Name("MatrixTriangularSolve")
+ .Device(DEVICE_GPU)
+ .TypeConstraint<double>("T"),
+ MatrixTriangularSolveOpGPU<double>);
+
+REGISTER_KERNEL_BUILDER(
+ Name("BatchMatrixTriangularSolve")
+ .Device(DEVICE_GPU)
+ .TypeConstraint<float>("T"),
+ MatrixTriangularSolveOpGPU<float>);
+
+REGISTER_KERNEL_BUILDER(
+ Name("BatchMatrixTriangularSolve")
+ .Device(DEVICE_GPU)
+ .TypeConstraint<double>("T"),
+ MatrixTriangularSolveOpGPU<double>);
+#endif //GOOGLE_CUDA
+
} // namespace tensorflow
diff --git a/tensorflow/core/kernels/range_sampler.h b/tensorflow/core/kernels/range_sampler.h
index 1372975..3010666 100644
--- a/tensorflow/core/kernels/range_sampler.h
+++ b/tensorflow/core/kernels/range_sampler.h
@@ -115,10 +115,12 @@
int64 Sample(random::SimplePhilox* rnd) const override {
LOG(FATAL) << "Should not be called";
+ return 0;
}
float Probability(int64 value) const override {
LOG(FATAL) << "Should not be called";
+ return 0;
}
void SampleBatchGetExpectedCountAvoid(
diff --git a/tensorflow/core/lib/io/path.cc b/tensorflow/core/lib/io/path.cc
index c59d6d1..de49d07 100644
--- a/tensorflow/core/lib/io/path.cc
+++ b/tensorflow/core/lib/io/path.cc
@@ -55,7 +55,10 @@
// the first part of the output.
std::pair<StringPiece, StringPiece> SplitPath(StringPiece path) {
auto pos = path.rfind('/');
-
+#ifdef PLATFORM_WINDOWS
+ if (pos == StringPiece::npos)
+ pos = path.rfind('\\');
+#endif
// Handle the case with no '/' in 'path'.
if (pos == StringPiece::npos)
return std::make_pair(StringPiece(path.data(), 0), path);
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index cf39d2f..8d3d931 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -913,7 +913,8 @@
.SetShapeFn([](InferenceContext* c) {
// The inputs 'then' and 'else' must have the same shape.
ShapeHandle data = c->input(1);
- TF_RETURN_IF_ERROR(c->Merge(data, c->input(2), &data));
+ ShapeHandle other = c->input(2);
+ TF_RETURN_IF_ERROR(c->Merge(data, other, &data));
// The input 'cond' must either have the same shape as 'then' and
// 'else', or be a vector if 'then' and 'else' are at least vectors.
@@ -929,30 +930,49 @@
const int32 cond_rank = c->Rank(cond);
const int32 data_rank = c->Rank(data);
- if (cond_rank != 1) {
- // If the rank of 'cond' is != 1, the shape must match 'then' and 'else'
- TF_RETURN_IF_ERROR(c->Merge(data, cond, &data));
+ if (cond_rank == 0){
+ // The rank of 'cond' is a scalar.
+ // t and e can have any shape.
+ c->set_output(0, data);
+ return Status::OK();
}
- if (data_rank != 0) {
- // If then and else are not scalars, then cond must be at least
- // a vector, and its first value must match that of 'else'
- TF_RETURN_IF_ERROR(c->WithRankAtLeast(cond, 1, &cond));
- if (cond_rank == 1) {
- TF_RETURN_IF_ERROR(c->Merge(cond, c->Vector(c->Dim(data, 0)), &cond));
- }
+
+ if (cond_rank != 1) {
+ // If 'cond' is not a vector, and not a scalar,
+ // then shape must match 'then' and 'else'
+ TF_RETURN_IF_ERROR(c->Merge(data, cond, &data));
+ c->set_output(0, data);
+ return Status::OK();
+ }
+
+ if (data_rank == 0) {
+ // if 'then' and 'else' are scalar also the cond must be
+ TF_RETURN_IF_ERROR(c->Merge(data, cond, &data));
+ c->set_output(0, data);
+ return Status::OK();
+ }
+
+ if (cond_rank == 1) {
+ // if the cond is a vector and the 'then' is not a scalar,
+ // the first dimension of 'then' and 'else'
+ TF_RETURN_IF_ERROR(c->Merge(cond, c->Vector(c->Dim(data, 0)), &cond));
+ c->set_output(0, data);
+ return Status::OK();
}
c->set_output(0, data);
return Status::OK();
- })
+ })
.Doc(R"doc(
Selects elements from `t` or `e`, depending on `condition`.
-The `t`, and `e` tensors must all have the same shape,
-and the output will also have that shape. The `condition` tensor
-must be a scalar if `t` and `e` are scalars. If `t` and `e` are vectors
-or higher rank, then `condition` must be either a vector with size
-matching the first dimension of `t`, or must have the same shape as `t`.
+The `t`, and `e` tensors must all have the same shape, and the
+output will also have that shape.
+
+The `condition` tensor must be a scalar if `t` and `e` are scalars.
+If `t` and `e` are vectors or higher rank, then `condition` must be either a
+scalar, a vector with size matching the first dimension of `t`, or must have
+the same shape as `t`.
The `condition` tensor acts as a mask that chooses, based on the value at each
element, whether the corresponding element / row in the output should be
diff --git a/tensorflow/core/ops/math_ops_test.cc b/tensorflow/core/ops/math_ops_test.cc
index edcd09a..79ae187 100644
--- a/tensorflow/core/ops/math_ops_test.cc
+++ b/tensorflow/core/ops/math_ops_test.cc
@@ -188,7 +188,10 @@
ShapeInferenceTestOp op("Select");
INFER_OK(op, "?;?;?", "in1|in2");
+ // scalar case
+ INFER_OK(op, "[];[1];?", "in1");
INFER_OK(op, "[];?;?", "in1|in2");
+
INFER_OK(op, "[1];?;?",
"in1|in2"); // When cond is vector, t/e may not match it.
INFER_OK(op, "[1,2];?;?", "in1|in2?");
@@ -200,8 +203,8 @@
INFER_OK(op, "?;[1,2];?", "in1");
INFER_OK(op, "?;?;[1,2]", "in2");
- INFER_OK(op, "[1];[];?", "in1");
- INFER_ERROR("Shapes must be equal rank, but are 1 and 0", op, "[];[1];?");
+ INFER_ERROR("Shapes must be equal rank, but are 0 and 1", op, "[1];[];?");
+ INFER_ERROR("Shapes must be equal rank, but are 1 and 2", op, "[];[1];[1,2]");
INFER_ERROR("Shapes must be equal rank, but are 1 and 2", op, "[1,2];[1];?");
INFER_OK(op, "[2];[?];[?]", "in1|in2");
diff --git a/tensorflow/core/platform/default/gpu/cupti_wrapper.h b/tensorflow/core/platform/default/gpu/cupti_wrapper.h
index e482f86..38e01ce 100644
--- a/tensorflow/core/platform/default/gpu/cupti_wrapper.h
+++ b/tensorflow/core/platform/default/gpu/cupti_wrapper.h
@@ -20,9 +20,11 @@
#include <stddef.h>
#include <stdint.h>
-
+#if defined(WIN32)
+#include "extras/CUPTI/include/cupti.h"
+#else
#include "cuda/extras/CUPTI/include/cupti.h"
-
+#endif
namespace perftools {
namespace gputools {
namespace profiler {
diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h
index 1d6928b..3aaf3a5 100644
--- a/tensorflow/core/platform/env.h
+++ b/tensorflow/core/platform/env.h
@@ -261,6 +261,14 @@
virtual Status GetSymbolFromLibrary(void* handle, const char* symbol_name,
void** symbol) = 0;
+ // \brief build the name of dynamic library.
+ //
+ // "name" should be name of the library.
+ // "version" should be the version of the library or NULL
+ // returns the name that LoadLibrary() can use
+ virtual string FormatLibraryFileName(const string& name,
+ const string& version) = 0;
+
private:
std::unique_ptr<FileSystemRegistry> file_system_registry_;
TF_DISALLOW_COPY_AND_ASSIGN(Env);
@@ -318,7 +326,10 @@
void** symbol) override {
return target_->GetSymbolFromLibrary(handle, symbol_name, symbol);
}
-
+ string FormatLibraryFileName(const string& name,
+ const string& version) override {
+ return target_->FormatLibraryFileName(name, version);
+ }
private:
Env* target_;
};
diff --git a/tensorflow/core/platform/load_library.h b/tensorflow/core/platform/load_library.h
index 850ca9f..9038de2 100644
--- a/tensorflow/core/platform/load_library.h
+++ b/tensorflow/core/platform/load_library.h
@@ -25,8 +25,6 @@
Status LoadLibrary(const char* library_filename, void** handle);
Status GetSymbolFromLibrary(void* handle, const char* symbol_name,
void** symbol);
-// Return the filename of a dynamically linked library formatted according to
-// platform naming conventions
string FormatLibraryFileName(const string& name, const string& version);
} // namespace internal
diff --git a/tensorflow/core/platform/platform.h b/tensorflow/core/platform/platform.h
index 982a7b8..55d7954 100644
--- a/tensorflow/core/platform/platform.h
+++ b/tensorflow/core/platform/platform.h
@@ -20,7 +20,8 @@
// mobile.
#if !defined(PLATFORM_POSIX) && !defined(PLATFORM_GOOGLE) && \
- !defined(PLATFORM_POSIX_ANDROID) && !defined(PLATFORM_GOOGLE_ANDROID)
+ !defined(PLATFORM_POSIX_ANDROID) && !defined(PLATFORM_GOOGLE_ANDROID) && \
+ !defined(PLATFORM_WINDOWS)
// Choose which platform we are on.
#if defined(ANDROID) || defined(__ANDROID__)
diff --git a/tensorflow/core/platform/posix/env.cc b/tensorflow/core/platform/posix/env.cc
index 75e300a..2f9c8e4 100644
--- a/tensorflow/core/platform/posix/env.cc
+++ b/tensorflow/core/platform/posix/env.cc
@@ -119,6 +119,10 @@
return tensorflow::internal::GetSymbolFromLibrary(handle, symbol_name,
symbol);
}
+
+ string FormatLibraryFileName(const string& name, const string& version) {
+ return tensorflow::internal::FormatLibraryFileName(name, version);
+ }
};
} // namespace
diff --git a/tensorflow/core/platform/stacktrace.h b/tensorflow/core/platform/stacktrace.h
index beb97b6..3c953c9 100644
--- a/tensorflow/core/platform/stacktrace.h
+++ b/tensorflow/core/platform/stacktrace.h
@@ -22,7 +22,7 @@
#if defined(PLATFORM_GOOGLE)
#include "tensorflow/core/platform/google/stacktrace.h"
#elif defined(PLATFORM_POSIX) || defined(PLATFORM_POSIX_ANDROID) || \
- defined(PLATFORM_GOOGLE_ANDROID)
+ defined(PLATFORM_GOOGLE_ANDROID) || defined(PLATFORM_WINDOWS)
#include "tensorflow/core/platform/default/stacktrace.h"
#else
#error Define the appropriate PLATFORM_<foo> macro for this platform
diff --git a/tensorflow/core/platform/windows/env.cc b/tensorflow/core/platform/windows/env.cc
index 09edc10..41ce5d9 100644
--- a/tensorflow/core/platform/windows/env.cc
+++ b/tensorflow/core/platform/windows/env.cc
@@ -26,6 +26,7 @@
#include <thread>
#include <vector>
+#include <string>
#include "tensorflow/core/lib/core/error_codes.pb.h"
#include "tensorflow/core/platform/load_library.h"
@@ -52,7 +53,20 @@
class WindowsEnv : public Env {
public:
- WindowsEnv() {}
+ WindowsEnv()
+ : GetSystemTimePreciseAsFileTime_(NULL) {
+ // GetSystemTimePreciseAsFileTime function is only available in the latest
+ // versions of Windows. For that reason, we try to look it up in
+ // kernel32.dll at runtime and use an alternative option if the function
+ // is not available.
+ HMODULE module = GetModuleHandle("kernel32.dll");
+ if (module != NULL) {
+ auto func = (FnGetSystemTimePreciseAsFileTime)GetProcAddress(
+ module, "GetSystemTimePreciseAsFileTime");
+ GetSystemTimePreciseAsFileTime_ = func;
+ }
+ }
+
~WindowsEnv() override {
LOG(FATAL) << "Env::Default() must not be destroyed";
}
@@ -62,11 +76,32 @@
}
uint64 NowMicros() override {
- FILETIME temp;
- GetSystemTimeAsFileTime(&temp);
- uint64 now_ticks =
- (uint64)temp.dwLowDateTime + ((uint64)(temp.dwHighDateTime) << 32LL);
- return now_ticks / 10LL;
+ if (GetSystemTimePreciseAsFileTime_ != NULL) {
+ // GetSystemTimePreciseAsFileTime function is only available in latest
+ // versions of Windows, so we need to check for its existence here.
+ // All std::chrono clocks on Windows proved to return
+ // values that may repeat, which is not good enough for some uses.
+ constexpr int64_t kUnixEpochStartTicks = 116444736000000000i64;
+ constexpr int64_t kFtToMicroSec = 10;
+
+ // This interface needs to return system time and not
+ // just any microseconds because it is often used as an argument
+ // to TimedWait() on condition variable
+ FILETIME system_time;
+ GetSystemTimePreciseAsFileTime_(&system_time);
+
+ LARGE_INTEGER li;
+ li.LowPart = system_time.dwLowDateTime;
+ li.HighPart = system_time.dwHighDateTime;
+ // Subtract unix epoch start
+ li.QuadPart -= kUnixEpochStartTicks;
+ // Convert to microsecs
+ li.QuadPart /= kFtToMicroSec;
+ return li.QuadPart;
+ }
+ using namespace std::chrono;
+ return duration_cast<microseconds>(
+ system_clock::now().time_since_epoch()).count();
}
void SleepForMicroseconds(int64 micros) override { Sleep(micros / 1000); }
@@ -94,19 +129,53 @@
});
}
- Status LoadLibrary(const char* library_filename, void** handle) override {
- return errors::Unimplemented("WindowsEnv::LoadLibrary");
+ Status LoadLibrary(const char *library_filename, void** handle) override {
+ std::string file_name = library_filename;
+ std::replace(file_name.begin(), file_name.end(), '/', '\\');
+
+ HMODULE hModule = LoadLibraryEx(file_name.c_str(), NULL,
+ LOAD_WITH_ALTERED_SEARCH_PATH);
+ if (!hModule) {
+ return errors::NotFound(file_name + " not found");
+ }
+ *handle = hModule;
+ return Status::OK();
}
Status GetSymbolFromLibrary(void* handle, const char* symbol_name,
- void** symbol) override {
- return errors::Unimplemented("WindowsEnv::GetSymbolFromLibrary");
+ void** symbol) override {
+ FARPROC found_symbol;
+
+ found_symbol = GetProcAddress((HMODULE)handle, symbol_name);
+ if (found_symbol == NULL) {
+ return errors::NotFound(std::string(symbol_name) + " not found");
+ }
+ *symbol = (void **)found_symbol;
+ return Status::OK();
}
+
+ string FormatLibraryFileName(const string& name, const string& version)
+ override {
+ string filename;
+ if (version.size() == 0) {
+ filename = name + ".dll";
+ }
+ else {
+ filename = name + version + ".dll";
+ }
+ return filename;
+ }
+
+ private:
+ typedef VOID(WINAPI * FnGetSystemTimePreciseAsFileTime)(LPFILETIME);
+ FnGetSystemTimePreciseAsFileTime GetSystemTimePreciseAsFileTime_;
};
} // namespace
REGISTER_FILE_SYSTEM("", WindowsFileSystem);
+REGISTER_FILE_SYSTEM("file", LocalWinFileSystem);
+
Env* Env::Default() {
static Env* default_env = new WindowsEnv;
return default_env;
diff --git a/tensorflow/core/platform/windows/error.cc b/tensorflow/core/platform/windows/error.cc
new file mode 100644
index 0000000..39e941a
--- /dev/null
+++ b/tensorflow/core/platform/windows/error.cc
@@ -0,0 +1,33 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/platform/windows/error.h"
+
+namespace tensorflow {
+namespace internal {
+
+std::string GetWindowsErrorMessage(DWORD err) {
+ LPSTR buffer = NULL;
+ DWORD flags = FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
+ FORMAT_MESSAGE_IGNORE_INSERTS;
+ FormatMessageA(flags, NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+ reinterpret_cast<LPSTR>(&buffer), 0, NULL);
+ std::string message = buffer;
+ LocalFree(buffer);
+ return message;
+}
+
+} // namespace internal
+} // namespace tensorflow
diff --git a/tensorflow/core/platform/windows/error.h b/tensorflow/core/platform/windows/error.h
new file mode 100644
index 0000000..026e0d5
--- /dev/null
+++ b/tensorflow/core/platform/windows/error.h
@@ -0,0 +1,32 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_PLATFORM_WINDOWS_ERROR_H_
+#define TENSORFLOW_CORE_PLATFORM_WINDOWS_ERROR_H_
+
+#include <string>
+
+#include <Windows.h>
+
+namespace tensorflow {
+namespace internal {
+
+std::string GetWindowsErrorMessage(DWORD err);
+
+}
+}
+
+#endif // TENSORFLOW_CORE_PLATFORM_WINDOWS_ERROR_H_
+
diff --git a/tensorflow/core/platform/windows/net.cc b/tensorflow/core/platform/windows/net.cc
index fbc0c39..46eb072 100644
--- a/tensorflow/core/platform/windows/net.cc
+++ b/tensorflow/core/platform/windows/net.cc
@@ -15,25 +15,27 @@
#include "tensorflow/core/platform/net.h"
-#include <cerrno>
#include <cstdlib>
#include <unordered_set>
#include <sys/types.h>
-#include <winsock.h>
+#include <winsock2.h>
-#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/windows/error.h"
#undef ERROR
+#pragma comment(lib,"Ws2_32.lib")
+
namespace tensorflow {
namespace internal {
namespace {
+
bool IsPortAvailable(int* port, bool is_tcp) {
const int protocol = is_tcp ? IPPROTO_TCP : 0;
- const int fd = socket(AF_INET, is_tcp ? SOCK_STREAM : SOCK_DGRAM, protocol);
+ SOCKET sock = socket(AF_INET, is_tcp ? SOCK_STREAM : SOCK_DGRAM, protocol);
struct sockaddr_in addr;
int addr_len = static_cast<int>(sizeof(addr));
@@ -41,17 +43,20 @@
CHECK_GE(*port, 0);
CHECK_LE(*port, 65535);
- if (fd < 0) {
- LOG(ERROR) << "socket() failed: " << strerror(errno);
+ if (sock == INVALID_SOCKET) {
+ LOG(ERROR) << "socket() failed: " <<
+ GetWindowsErrorMessage(WSAGetLastError());
return false;
}
- // SO_REUSEADDR lets us start up a server immediately after it exists.
- int one = 1;
- if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (const char*)&one, sizeof(one)) <
- 0) {
- LOG(ERROR) << "setsockopt() failed: " << strerror(errno);
- closesocket(fd);
+ // SO_REUSEADDR lets us start up a server immediately after it exits.
+ const int one = 1;
+ int result = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
+ reinterpret_cast<const char*>(&one), sizeof(one));
+ if (result == SOCKET_ERROR) {
+ LOG(ERROR) << "setsockopt() failed: " <<
+ GetWindowsErrorMessage(WSAGetLastError());
+ closesocket(sock);
return false;
}
@@ -59,18 +64,23 @@
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = INADDR_ANY;
addr.sin_port = htons((uint16_t)*port);
- if (bind(fd, (struct sockaddr*)&addr, sizeof(addr)) < 0) {
- LOG(WARNING) << "bind(port=" << *port << ") failed: " << strerror(errno);
- closesocket(fd);
+ result = bind(sock, (struct sockaddr*)&addr, sizeof(addr));
+ if (result == SOCKET_ERROR) {
+ LOG(WARNING) << "bind(port=" << *port << ") failed: " <<
+ GetWindowsErrorMessage(WSAGetLastError());
+ closesocket(sock);
return false;
}
// Get the bound port number.
- if (getsockname(fd, (struct sockaddr*)&addr, &addr_len) < 0) {
- LOG(WARNING) << "getsockname() failed: " << strerror(errno);
- closesocket(fd);
+ result = getsockname(sock, (struct sockaddr*)&addr, &addr_len);
+ if (result == SOCKET_ERROR) {
+ LOG(WARNING) << "getsockname() failed: " <<
+ GetWindowsErrorMessage(WSAGetLastError());
+ closesocket(sock);
return false;
}
+
CHECK_LE(addr_len, sizeof(addr));
actual_port = ntohs(addr.sin_port);
CHECK_GT(actual_port, 0);
@@ -79,7 +89,8 @@
} else {
CHECK_EQ(*port, actual_port);
}
- closesocket(fd);
+
+ closesocket(sock);
return true;
}
@@ -89,6 +100,12 @@
} // namespace
int PickUnusedPortOrDie() {
+ WSADATA wsaData;
+ if (WSAStartup(MAKEWORD(2, 2), &wsaData) != NO_ERROR) {
+ LOG(ERROR) << "Error at WSAStartup()";
+ return false;
+ }
+
static std::unordered_set<int> chosen_ports;
// Type of port to first pick in the next iteration.
@@ -121,6 +138,7 @@
}
chosen_ports.insert(port);
+ WSACleanup();
return port;
}
diff --git a/tensorflow/core/platform/windows/port.cc b/tensorflow/core/platform/windows/port.cc
index 0721976..ee5be22 100644
--- a/tensorflow/core/platform/windows/port.cc
+++ b/tensorflow/core/platform/windows/port.cc
@@ -19,8 +19,8 @@
#ifdef SNAPPY
#include <snappy.h>
#endif
-#include <WinSock2.h>
-#pragma comment(lib, "Ws2_32.lib")
+
+#include <Windows.h>
#include "tensorflow/core/platform/cpu_info.h"
#include "tensorflow/core/platform/demangle.h"
@@ -37,10 +37,13 @@
void InitMain(const char* usage, int* argc, char*** argv) {}
string Hostname() {
- char hostname[1024];
- gethostname(hostname, sizeof hostname);
- hostname[sizeof hostname - 1] = 0;
- return string(hostname);
+ char name[1024];
+ DWORD name_size = sizeof(name);
+ name[0] = 0;
+ if (::GetComputerNameA(name, &name_size)) {
+ name[name_size] = 0;
+ }
+ return name;
}
int NumSchedulableCPUs() {
diff --git a/tensorflow/core/platform/windows/windows_file_system.cc b/tensorflow/core/platform/windows/windows_file_system.cc
index 44b26d9..714bb55 100644
--- a/tensorflow/core/platform/windows/windows_file_system.cc
+++ b/tensorflow/core/platform/windows/windows_file_system.cc
@@ -30,6 +30,7 @@
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/posix/error.h"
+#include "tensorflow/core/platform/windows/error.h"
#include "tensorflow/core/platform/windows/windows_file_system.h"
// TODO(mrry): Prevent this Windows.h #define from leaking out of our headers.
@@ -39,19 +40,71 @@
namespace {
+// RAII helpers for HANDLEs
+const auto CloseHandleFunc = [](HANDLE h) { ::CloseHandle(h); };
+typedef std::unique_ptr<void, decltype(CloseHandleFunc)> UniqueCloseHandlePtr;
+
+inline Status IOErrorFromWindowsError(const string& context, DWORD err) {
+ return IOError(
+ context + string(" : ") + internal::GetWindowsErrorMessage(err), err);
+}
+
+// PLEASE NOTE: hfile is expected to be an async handle
+// (i.e. opened with FILE_FLAG_OVERLAPPED)
+SSIZE_T pread(HANDLE hfile, char* src, size_t num_bytes, uint64_t offset) {
+ assert(num_bytes <= std::numeric_limits<DWORD>::max());
+ OVERLAPPED overlapped = {0};
+ ULARGE_INTEGER offset_union;
+ offset_union.QuadPart = offset;
+
+ overlapped.Offset = offset_union.LowPart;
+ overlapped.OffsetHigh = offset_union.HighPart;
+ overlapped.hEvent = ::CreateEvent(NULL, TRUE, FALSE, NULL);
+
+ if (NULL == overlapped.hEvent) {
+ return -1;
+ }
+
+ SSIZE_T result = 0;
+
+ unsigned long bytes_read = 0;
+ DWORD last_error = ERROR_SUCCESS;
+
+ BOOL read_result = ::ReadFile(hfile, src, static_cast<DWORD>(num_bytes),
+ &bytes_read, &overlapped);
+ if ((FALSE == read_result) &&
+ ((last_error = GetLastError()) != ERROR_IO_PENDING)) {
+ result = (last_error == ERROR_HANDLE_EOF) ? 0 : -1;
+ } else {
+ if (ERROR_IO_PENDING == last_error) { // Otherwise bytes_read already has the result.
+ BOOL overlapped_result = ::GetOverlappedResult(hfile, &overlapped,
+ &bytes_read, TRUE);
+ if (FALSE == overlapped_result) {
+ result = (::GetLastError() == ERROR_HANDLE_EOF) ? 0 : -1;
+ }
+ else {
+ result = bytes_read;
+ }
+ }
+ }
+
+ ::CloseHandle(overlapped.hEvent);
+
+ return result;
+}
+
// read() based random-access
class WindowsRandomAccessFile : public RandomAccessFile {
private:
string filename_;
- FILE* file_;
+ HANDLE hfile_;
public:
- WindowsRandomAccessFile(const string& fname, FILE* f)
- : filename_(fname), file_(f) {}
+ WindowsRandomAccessFile(const string& fname, HANDLE hfile)
+ : filename_(fname), hfile_(hfile) {}
~WindowsRandomAccessFile() override {
- if (file_ != NULL) {
- // Ignoring any potential errors
- fclose(file_);
+ if (hfile_ != NULL && hfile_ != INVALID_HANDLE_VALUE) {
+ ::CloseHandle(hfile_);
}
}
@@ -59,13 +112,10 @@
char* scratch) const override {
Status s;
char* dst = scratch;
- int seek_result = fseek(file_, offset, SEEK_SET);
- if (seek_result) {
- return IOError(filename_, errno);
- }
while (n > 0 && s.ok()) {
- size_t r = fread(dst, 1, n, file_);
+ SSIZE_T r = pread(hfile_, dst, n, offset);
if (r > 0) {
+ offset += r;
dst += r;
n -= r;
} else if (r == 0) {
@@ -84,104 +134,246 @@
class WindowsWritableFile : public WritableFile {
private:
string filename_;
- FILE* file_;
+ HANDLE hfile_;
public:
- WindowsWritableFile(const string& fname, FILE* f)
- : filename_(fname), file_(f) {}
+ WindowsWritableFile(const string& fname, HANDLE hFile)
+ : filename_(fname), hfile_(hFile) {}
~WindowsWritableFile() override {
- if (file_ != NULL) {
- // Ignoring any potential errors
- fclose(file_);
+ if (hfile_ != NULL && hfile_ != INVALID_HANDLE_VALUE) {
+ WindowsWritableFile::Close();
}
}
Status Append(const StringPiece& data) override {
- size_t r = fwrite(data.data(), 1, data.size(), file_);
- if (r != data.size()) {
- return IOError(filename_, errno);
+ DWORD bytes_written = 0;
+ DWORD data_size = static_cast<DWORD>(data.size());
+ BOOL write_result = ::WriteFile(hfile_, data.data(), data_size,
+ &bytes_written, NULL);
+ if (FALSE == write_result) {
+ return IOErrorFromWindowsError(
+ "Failed to WriteFile: " + filename_, ::GetLastError());
}
+
+ assert(size_t(bytes_written) == data.size());
return Status::OK();
}
Status Close() override {
- Status result;
- if (fclose(file_) != 0) {
- result = IOError(filename_, errno);
+ assert(INVALID_HANDLE_VALUE != hfile_);
+
+ Status result = Flush();
+ if (!result.ok()) {
+ return result;
}
- file_ = NULL;
- return result;
+
+ if (FALSE == ::CloseHandle(hfile_)) {
+ return IOErrorFromWindowsError(
+ "CloseHandle failed for: " + filename_, ::GetLastError());
+ }
+
+ hfile_ = INVALID_HANDLE_VALUE;
+ return Status::OK();
}
Status Flush() override {
- if (fflush(file_) != 0) {
- return IOError(filename_, errno);
+ if (FALSE == ::FlushFileBuffers(hfile_)) {
+ return IOErrorFromWindowsError(
+ "FlushFileBuffers failed for: " + filename_, ::GetLastError());
}
return Status::OK();
}
Status Sync() override {
- Status s;
- if (fflush(file_) != 0) {
- s = IOError(filename_, errno);
- }
- return s;
+ return Flush();
}
};
+class WinReadOnlyMemoryRegion : public ReadOnlyMemoryRegion {
+ private:
+ const std::string filename_;
+ HANDLE hfile_;
+ HANDLE hmap_;
+
+ const void* const address_;
+ const uint64 length_;
+
+ public:
+ WinReadOnlyMemoryRegion(const std::string& filename, HANDLE hfile,
+ HANDLE hmap, const void* address, uint64 length)
+ : filename_(filename), hfile_(hfile), hmap_(hmap), address_(address),
+ length_(length) {}
+
+ ~WinReadOnlyMemoryRegion() {
+ BOOL ret = ::UnmapViewOfFile(address_);
+ assert(ret);
+
+ ret = ::CloseHandle(hmap_);
+ assert(ret);
+
+ ret = ::CloseHandle(hfile_);
+ assert(ret);
+ }
+
+ const void* data() override { return address_; }
+ uint64 length() override { return length_; }
+};
+
} // namespace
Status WindowsFileSystem::NewRandomAccessFile(
const string& fname, std::unique_ptr<RandomAccessFile>* result) {
string translated_fname = TranslateName(fname);
result->reset();
- Status s;
- FILE* f = fopen(translated_fname.c_str(), "r");
- if (f == NULL) {
- s = IOError(fname, errno);
- } else {
- result->reset(new WindowsRandomAccessFile(translated_fname, f));
+
+ // Open the file for read-only random access
+ // Random access is to disable read-ahead as the system reads too much data
+ // Open in async mode which makes Windows allow more parallelism even
+ // if we need to do sync I/O on top of it.
+ DWORD file_flags = FILE_ATTRIBUTE_READONLY | FILE_FLAG_RANDOM_ACCESS |
+ FILE_FLAG_OVERLAPPED;
+ // Shared access is necessary for tests to pass
+ // almost all tests would work with a possible exception of fault_injection.
+ DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
+
+ HANDLE hfile = ::CreateFileA(translated_fname.c_str(), GENERIC_READ,
+ share_mode, NULL, OPEN_EXISTING, file_flags,
+ NULL);
+
+ if (INVALID_HANDLE_VALUE == hfile) {
+ string context = "NewRandomAccessFile failed to Create/Open: " + fname;
+ return IOErrorFromWindowsError(context, ::GetLastError());
}
- return s;
+
+ result->reset(new WindowsRandomAccessFile(translated_fname, hfile));
+ return Status::OK();
}
Status WindowsFileSystem::NewWritableFile(
const string& fname, std::unique_ptr<WritableFile>* result) {
string translated_fname = TranslateName(fname);
- Status s;
- FILE* f = fopen(translated_fname.c_str(), "w");
- if (f == NULL) {
- result->reset();
- s = IOError(fname, errno);
- } else {
- result->reset(new WindowsWritableFile(translated_fname, f));
+ result->reset();
+
+ DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
+ HANDLE hfile = ::CreateFileA(translated_fname.c_str(), GENERIC_WRITE,
+ share_mode, NULL, CREATE_ALWAYS,
+ FILE_ATTRIBUTE_NORMAL, NULL);
+
+ if (INVALID_HANDLE_VALUE == hfile) {
+ string context = "Failed to create a NewWriteableFile: " + fname;
+ return IOErrorFromWindowsError(context, ::GetLastError());
}
- return s;
+
+ result->reset(new WindowsWritableFile(translated_fname, hfile));
+ return Status::OK();
}
Status WindowsFileSystem::NewAppendableFile(
const string& fname, std::unique_ptr<WritableFile>* result) {
string translated_fname = TranslateName(fname);
- Status s;
- FILE* f = fopen(translated_fname.c_str(), "a");
- if (f == NULL) {
- result->reset();
- s = IOError(fname, errno);
- } else {
- result->reset(new WindowsWritableFile(translated_fname, f));
+ result->reset();
+
+ DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
+ HANDLE hfile = ::CreateFileA(translated_fname.c_str(), GENERIC_WRITE,
+ share_mode, NULL, OPEN_ALWAYS,
+ FILE_ATTRIBUTE_NORMAL, NULL);
+
+ if (INVALID_HANDLE_VALUE == hfile) {
+ string context = "Failed to create a NewAppendableFile: " + fname;
+ return IOErrorFromWindowsError(context, ::GetLastError());
}
- return s;
+
+ UniqueCloseHandlePtr file_guard(hfile, CloseHandleFunc);
+
+ DWORD file_ptr = ::SetFilePointer(hfile, NULL, NULL, FILE_END);
+ if (INVALID_SET_FILE_POINTER == file_ptr) {
+ string context = "Failed to create a NewAppendableFile: " + fname;
+ return IOErrorFromWindowsError(context, ::GetLastError());
+ }
+
+ result->reset(new WindowsWritableFile(translated_fname, hfile));
+ file_guard.release();
+
+ return Status::OK();
}
Status WindowsFileSystem::NewReadOnlyMemoryRegionFromFile(
const string& fname, std::unique_ptr<ReadOnlyMemoryRegion>* result) {
- return errors::Unimplemented(
- "WindowsFileSystem::NewReadOnlyMemoryRegionFromFile");
+ string translated_fname = TranslateName(fname);
+ result->reset();
+ Status s = Status::OK();
+
+ // Open the file for read-only random access
+ DWORD file_flags = FILE_ATTRIBUTE_READONLY | FILE_FLAG_RANDOM_ACCESS;
+
+ // Open in async mode which makes Windows allow more parallelism even
+ // if we need to do sync I/O on top of it.
+ file_flags |= FILE_FLAG_OVERLAPPED;
+
+ DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
+ HANDLE hfile = ::CreateFileA(translated_fname.c_str(), GENERIC_READ,
+ share_mode, NULL, OPEN_EXISTING, file_flags,
+ NULL);
+
+ if (INVALID_HANDLE_VALUE == hfile) {
+ return IOErrorFromWindowsError(
+ "NewReadOnlyMemoryRegionFromFile failed to Create/Open: " + fname,
+ ::GetLastError());
+ }
+
+ UniqueCloseHandlePtr file_guard(hfile, CloseHandleFunc);
+
+ // Use mmap when virtual address-space is plentiful.
+ uint64_t file_size;
+ s = GetFileSize(translated_fname, &file_size);
+ if (s.ok()) {
+ // Will not map empty files
+ if (file_size == 0) {
+ return IOError(
+ "NewReadOnlyMemoryRegionFromFile failed to map empty file: " + fname,
+ EINVAL);
+ }
+
+ HANDLE hmap = ::CreateFileMappingA(hfile, NULL, PAGE_READONLY,
+ 0, // Whole file at its present length
+ 0,
+ NULL); // Mapping name
+
+ if (!hmap) {
+ string context = "Failed to create file mapping for "
+ "NewReadOnlyMemoryRegionFromFile: " + fname;
+ return IOErrorFromWindowsError(context, ::GetLastError());
+ }
+
+ UniqueCloseHandlePtr map_guard(hmap, CloseHandleFunc);
+
+ const void* mapped_region = ::MapViewOfFileEx(
+ hmap, FILE_MAP_READ,
+ 0, // High DWORD of access start
+ 0, // Low DWORD
+ file_size,
+ NULL); // Let the OS choose the mapping
+
+ if (!mapped_region) {
+ string context = "Failed to MapViewOfFile for "
+ "NewReadOnlyMemoryRegionFromFile: " + fname;
+ return IOErrorFromWindowsError(context, ::GetLastError());
+ }
+
+ result->reset(new WinReadOnlyMemoryRegion(fname, hfile, hmap,
+ mapped_region, file_size));
+
+ map_guard.release();
+ file_guard.release();
+ }
+
+ return s;
}
bool WindowsFileSystem::FileExists(const string& fname) {
- return _access(TranslateName(fname).c_str(), 0) == 0;
+ constexpr int kOk = 0;
+ return _access(TranslateName(fname).c_str(), kOk) == 0;
}
Status WindowsFileSystem::GetChildren(const string& dir,
@@ -189,27 +381,39 @@
string translated_dir = TranslateName(dir);
result->clear();
+ string pattern = translated_dir;
+ if (!pattern.empty() && pattern.back() != '\\' && pattern.back() != '/') {
+ pattern += '\\*';
+ } else {
+ pattern += '*';
+ }
+
WIN32_FIND_DATA find_data;
- HANDLE find_handle = FindFirstFile(translated_dir.c_str(), &find_data);
+ HANDLE find_handle = ::FindFirstFileA(pattern.c_str(), &find_data);
if (find_handle == INVALID_HANDLE_VALUE) {
- // TODO(mrry): Convert to a more specific error.
- return errors::Unknown("Error code: ", GetLastError());
+ string context = "FindFirstFile failed for: " + translated_dir;
+ return IOErrorFromWindowsError(context, ::GetLastError());
}
- result->push_back(find_data.cFileName);
- while (FindNextFile(find_handle, &find_data)) {
- result->push_back(find_data.cFileName);
+
+ do {
+ const StringPiece basename = find_data.cFileName;
+ if (basename != "." && basename != "..") {
+ result->push_back(find_data.cFileName);
+ }
+ } while (::FindNextFileA(find_handle, &find_data));
+
+ if (!::FindClose(find_handle)) {
+ string context = "FindClose failed for: " + translated_dir;
+ return IOErrorFromWindowsError(context, ::GetLastError());
}
- if (!FindClose(find_handle)) {
- // TODO(mrry): Convert to a more specific error.
- return errors::Unknown("Error closing find handle: ", GetLastError());
- }
+
return Status::OK();
}
Status WindowsFileSystem::DeleteFile(const string& fname) {
Status result;
if (unlink(TranslateName(fname).c_str()) != 0) {
- result = IOError(fname, errno);
+ result = IOError("Failed to delete a file: " + fname, errno);
}
return result;
}
@@ -217,7 +421,7 @@
Status WindowsFileSystem::CreateDir(const string& name) {
Status result;
if (_mkdir(TranslateName(name).c_str()) != 0) {
- result = IOError(name, errno);
+ result = IOError("Failed to create a directory: " + name, errno);
}
return result;
}
@@ -225,42 +429,52 @@
Status WindowsFileSystem::DeleteDir(const string& name) {
Status result;
if (_rmdir(TranslateName(name).c_str()) != 0) {
- result = IOError(name, errno);
+ result = IOError("Failed to remove a directory: " + name, errno);
}
return result;
}
Status WindowsFileSystem::GetFileSize(const string& fname, uint64* size) {
- Status s;
- struct _stat sbuf;
- if (_stat(TranslateName(fname).c_str(), &sbuf) != 0) {
- *size = 0;
- s = IOError(fname, errno);
- } else {
- *size = sbuf.st_size;
+ string translated_fname = TranslateName(fname);
+ Status result;
+ WIN32_FILE_ATTRIBUTE_DATA attrs;
+ if (TRUE == ::GetFileAttributesExA(translated_fname.c_str(),
+ GetFileExInfoStandard, &attrs)) {
+ ULARGE_INTEGER file_size;
+ file_size.HighPart = attrs.nFileSizeHigh;
+ file_size.LowPart = attrs.nFileSizeLow;
+ *size = file_size.QuadPart;
}
- return s;
+ else {
+ string context = "Can not get size for: " + fname;
+ result = IOErrorFromWindowsError(context, ::GetLastError());
+ }
+ return result;
}
Status WindowsFileSystem::RenameFile(const string& src, const string& target) {
Status result;
- if (rename(TranslateName(src).c_str(), TranslateName(target).c_str()) != 0) {
- result = IOError(src, errno);
+ // rename() is not capable of replacing the existing file as on Linux
+ // so use OS API directly
+ if (!::MoveFileExA(TranslateName(src).c_str(), TranslateName(target).c_str(),
+ MOVEFILE_REPLACE_EXISTING)) {
+ string context(strings::StrCat("Failed to rename: ", src, " to: ", target));
+ result = IOErrorFromWindowsError(context, ::GetLastError());
}
return result;
}
Status WindowsFileSystem::Stat(const string& fname, FileStatistics* stat) {
- Status s;
+ Status result;
struct _stat sbuf;
if (_stat(TranslateName(fname).c_str(), &sbuf) != 0) {
- s = IOError(fname, errno);
+ result = IOError(fname, errno);
} else {
stat->mtime_nsec = sbuf.st_mtime * 1e9;
stat->length = sbuf.st_size;
stat->is_directory = PathIsDirectory(TranslateName(fname).c_str());
}
- return s;
+ return result;
}
} // namespace tensorflow
\ No newline at end of file
diff --git a/tensorflow/core/platform/windows/windows_file_system.h b/tensorflow/core/platform/windows/windows_file_system.h
index 68b391f..12b579b 100644
--- a/tensorflow/core/platform/windows/windows_file_system.h
+++ b/tensorflow/core/platform/windows/windows_file_system.h
@@ -64,7 +64,14 @@
}
};
-Status IOError(const string& context, int err_number);
+class LocalWinFileSystem : public WindowsFileSystem {
+public:
+ string TranslateName(const string& name) const override {
+ StringPiece scheme, host, path;
+ ParseURI(name, &scheme, &host, &path);
+ return path.ToString();
+ }
+};
} // namespace tensorflow
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index bab4572..1e8ae0b 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -20,7 +20,7 @@
#define TF_MAJOR_VERSION 0
#define TF_MINOR_VERSION 11
-#define TF_PATCH_VERSION 0rc0
+#define TF_PATCH_VERSION 0rc1
// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
// "-beta", "-rc", "-rc.1")
diff --git a/tensorflow/examples/learn/README.md b/tensorflow/examples/learn/README.md
index 0ae72ae..c1c7e9b 100644
--- a/tensorflow/examples/learn/README.md
+++ b/tensorflow/examples/learn/README.md
@@ -21,7 +21,7 @@
* [Deep Neural Network with Customized Decay Function](iris_custom_decay_dnn.py)
## Specialized Models
-* [Building a Random Forest Model](random_forest.py)
+* [Building a Random Forest Model](random_forest_mnist.py)
* [Building a Wide & Deep Model](wide_n_deep_tutorial.py)
* [Building a Residual Network Model](resnet.py)
diff --git a/tensorflow/examples/tutorials/mnist/BUILD b/tensorflow/examples/tutorials/mnist/BUILD
index 60fd433..532c868 100644
--- a/tensorflow/examples/tutorials/mnist/BUILD
+++ b/tensorflow/examples/tutorials/mnist/BUILD
@@ -84,7 +84,6 @@
args = [
"--fake_data",
"--max_steps=10",
- "--train_dir=/tmp/mnist",
],
main = "fully_connected_feed.py",
srcs_version = "PY2AND3",
diff --git a/tensorflow/examples/tutorials/mnist/fully_connected_feed.py b/tensorflow/examples/tutorials/mnist/fully_connected_feed.py
index c8262a0..7e4d408 100644
--- a/tensorflow/examples/tutorials/mnist/fully_connected_feed.py
+++ b/tensorflow/examples/tutorials/mnist/fully_connected_feed.py
@@ -117,7 +117,7 @@
"""Train MNIST for a number of steps."""
# Get the sets of images and labels for training, validation, and
# test on MNIST.
- data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data)
+ data_sets = input_data.read_data_sets(FLAGS.input_data_dir, FLAGS.fake_data)
# Tell TensorFlow that the model will be built into the default Graph.
with tf.Graph().as_default():
@@ -146,13 +146,13 @@
init = tf.initialize_all_variables()
# Create a saver for writing training checkpoints.
- saver = tf.train.Saver()
+ saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
# Create a session for running Ops on the Graph.
sess = tf.Session()
# Instantiate a SummaryWriter to output summaries and the Graph.
- summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)
+ summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, sess.graph)
# And then after everything is built:
@@ -190,7 +190,7 @@
# Save a checkpoint and evaluate the model periodically.
if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
- checkpoint_file = os.path.join(FLAGS.train_dir, 'checkpoint')
+ checkpoint_file = os.path.join(FLAGS.log_dir, 'model.ckpt')
saver.save(sess, checkpoint_file, global_step=step)
# Evaluate against the training set.
print('Training Data Eval:')
@@ -216,6 +216,9 @@
def main(_):
+ if tf.gfile.Exists(FLAGS.log_dir):
+ tf.gfile.DeleteRecursively(FLAGS.log_dir)
+ tf.gfile.MakeDirs(FLAGS.log_dir)
run_training()
@@ -252,10 +255,16 @@
help='Batch size. Must divide evenly into the dataset sizes.'
)
parser.add_argument(
- '--train_dir',
+ '--input_data_dir',
type=str,
- default='data',
- help='Directory to put the training data.'
+ default='/tmp/tensorflow/mnist/input_data',
+ help='Directory to put the input data.'
+ )
+ parser.add_argument(
+ '--log_dir',
+ type=str,
+ default='/tmp/tensorflow/mnist/logs/fully_connected_feed',
+ help='Directory to put the log data.'
)
parser.add_argument(
'--fake_data',
diff --git a/tensorflow/examples/tutorials/mnist/mnist_softmax.py b/tensorflow/examples/tutorials/mnist/mnist_softmax.py
index 4c6f59e..beb184f 100644
--- a/tensorflow/examples/tutorials/mnist/mnist_softmax.py
+++ b/tensorflow/examples/tutorials/mnist/mnist_softmax.py
@@ -72,7 +72,7 @@
if __name__ == '__main__':
parser = argparse.ArgumentParser()
- parser.add_argument('--data_dir', type=str, default='/tmp/data',
- help='Directory for storing data')
+ parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
+ help='Directory for storing input data')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py b/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
index 9fda00a..fc91ac4 100644
--- a/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
+++ b/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
@@ -137,9 +137,9 @@
# Merge all the summaries and write them out to /tmp/mnist_logs (by default)
merged = tf.summary.merge_all()
- train_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/train',
+ train_writer = tf.train.SummaryWriter(FLAGS.log_dir + '/train',
sess.graph)
- test_writer = tf.train.SummaryWriter(FLAGS.summaries_dir + '/test')
+ test_writer = tf.train.SummaryWriter(FLAGS.log_dir + '/test')
tf.initialize_all_variables().run()
# Train the model, and also write summaries.
@@ -180,9 +180,9 @@
def main(_):
- if tf.gfile.Exists(FLAGS.summaries_dir):
- tf.gfile.DeleteRecursively(FLAGS.summaries_dir)
- tf.gfile.MakeDirs(FLAGS.summaries_dir)
+ if tf.gfile.Exists(FLAGS.log_dir):
+ tf.gfile.DeleteRecursively(FLAGS.log_dir)
+ tf.gfile.MakeDirs(FLAGS.log_dir)
train()
@@ -197,10 +197,9 @@
help='Initial learning rate')
parser.add_argument('--dropout', type=float, default=0.9,
help='Keep probability for training dropout.')
- parser.add_argument('--data_dir', type=str, default='/tmp/data',
- help='Directory for storing data')
- parser.add_argument('--summaries_dir', type=str, default='/tmp/mnist_logs',
- help='Summaries directory')
-
+ parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data',
+ help='Directory for storing input data')
+ parser.add_argument('--log_dir', type=str, default='/tmp/tensorflow/mnist/logs/mnist_with_summaries',
+ help='Summaries log directory')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md
index 6d22f67..44388cc 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.nn.sampled_softmax_loss.md
@@ -11,8 +11,8 @@
At inference time, you can compute full softmax probabilities with the
expression `tf.nn.softmax(tf.matmul(inputs, tf.transpose(weights)) + biases)`.
-See our [Candidate Sampling Algorithms Reference]
-(../../extras/candidate_sampling.pdf)
+See our
+[Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf)
Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007)
([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md
index c2736f1..2e04ee2 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.depthwise_conv2d_native.md
@@ -17,7 +17,7 @@
filter[di, dj, k, q]
Must have `strides[0] = strides[3] = 1`. For the most common case of the same
-horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
+horizontal and vertical strides, `strides = [1, stride, stride, 1]`.
##### Args:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md
index b0fa637..aa2d46f 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.nce_loss.md
@@ -42,8 +42,7 @@
where a sampled class equals one of the target classes. If set to
`True`, this is a "Sampled Logistic" loss instead of NCE, and we are
learning to generate log-odds instead of log probabilities. See
- our [Candidate Sampling Algorithms Reference]
- (../../extras/candidate_sampling.pdf).
+ our [Candidate Sampling Algorithms Reference](../../extras/candidate_sampling.pdf).
Default is False.
* <b>`partition_strategy`</b>: A string specifying the partitioning strategy, relevant
if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md
index 81134df..2738a61 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.nn.local_response_normalization.md
@@ -11,8 +11,8 @@
sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2)
output = input / (bias + alpha * sqr_sum) ** beta
-For details, see [Krizhevsky et al., ImageNet classification with deep
-convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
+For details, see
+[Krizhevsky et al., ImageNet classification with deep convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
##### Args:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md
index d40ed35..3f51a3b 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.conv2d.md
@@ -22,7 +22,7 @@
filter[di, dj, q, k]
Must have `strides[0] = strides[3] = 1`. For the most common case of the same
-horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
+horizontal and vertical strides, `strides = [1, stride, stride, 1]`.
##### Args:
diff --git a/tensorflow/g3doc/get_started/os_setup.md b/tensorflow/g3doc/get_started/os_setup.md
index 139e225..8d16894 100644
--- a/tensorflow/g3doc/get_started/os_setup.md
+++ b/tensorflow/g3doc/get_started/os_setup.md
@@ -63,37 +63,37 @@
```bash
# Ubuntu/Linux 64-bit, CPU only, Python 2.7
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
-# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below.
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
+# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
# Mac OS X, CPU only, Python 2.7:
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc0-py2-none-any.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc1-py2-none-any.whl
# Mac OS X, GPU enabled, Python 2.7:
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc0-py2-none-any.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc1-py2-none-any.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.4
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
-# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below.
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl
+# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.5
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
-# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below.
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl
+# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl
# Mac OS X, CPU only, Python 3.4 or 3.5:
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc0-py3-none-any.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc1-py3-none-any.whl
# Mac OS X, GPU enabled, Python 3.4 or 3.5:
-$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc0-py3-none-any.whl
+$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc1-py3-none-any.whl
```
Install TensorFlow:
@@ -159,37 +159,37 @@
```bash
# Ubuntu/Linux 64-bit, CPU only, Python 2.7
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
-# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
+# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
# Mac OS X, CPU only, Python 2.7:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc0-py2-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc1-py2-none-any.whl
# Mac OS X, GPU enabled, Python 2.7:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc0-py2-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc1-py2-none-any.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.4
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
-# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl
+# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.5
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
-# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl
+# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl
# Mac OS X, CPU only, Python 3.4 or 3.5:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc0-py3-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc1-py3-none-any.whl
# Mac OS X, GPU enabled, Python 3.4 or 3.5:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc0-py3-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc1-py3-none-any.whl
```
Finally install TensorFlow:
@@ -298,37 +298,37 @@
```bash
# Ubuntu/Linux 64-bit, CPU only, Python 2.7
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 2.7
-# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
+# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
# Mac OS X, CPU only, Python 2.7:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc0-py2-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc1-py2-none-any.whl
# Mac OS X, GPU enabled, Python 2.7:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc0-py2-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc1-py2-none-any.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.4
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.4
-# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp34-cp34m-linux_x86_64.whl
+# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp34-cp34m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, CPU only, Python 3.5
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl
# Ubuntu/Linux 64-bit, GPU enabled, Python 3.5
-# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Install from sources" below.
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp35-cp35m-linux_x86_64.whl
+# Requires CUDA toolkit 7.5 and CuDNN v5. For other versions, see "Installing from sources" below.
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc1-cp35-cp35m-linux_x86_64.whl
# Mac OS X, CPU only, Python 3.4 or 3.5:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc0-py3-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-0.11.0rc1-py3-none-any.whl
# Mac OS X, GPU enabled, Python 3.4 or 3.5:
-(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc0-py3-none-any.whl
+(tensorflow)$ export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow-0.11.0rc1-py3-none-any.whl
```
Finally install TensorFlow:
@@ -396,13 +396,13 @@
code.
We also have tags with `latest` replaced by a released version (e.g.,
-`0.11.0-gpu`).
+`0.11.0rc1-gpu`).
With Docker the installation is as follows:
* Install Docker on your machine.
* Create a [Docker
-group](http://docs.docker.com/engine/installation/ubuntulinux/#create-a-docker-group)
+group](https://docs.docker.com/engine/installation/linux/ubuntulinux/#/create-a-docker-group)
to allow launching containers without `sudo`.
* Launch a Docker container with the TensorFlow image. The image
gets downloaded automatically on first launch.
@@ -780,7 +780,7 @@
$ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
# The name of the .whl file will depend on your platform.
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.11.0rc0-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-0.11.0rc1-py2-none-any.whl
```
## Setting up TensorFlow for Development
diff --git a/tensorflow/g3doc/tutorials/wide/index.md b/tensorflow/g3doc/tutorials/wide/index.md
index 1bad7ea..643599f 100644
--- a/tensorflow/g3doc/tutorials/wide/index.md
+++ b/tensorflow/g3doc/tutorials/wide/index.md
@@ -222,12 +222,12 @@
feature values of a column and there are only a few of them, you can use
`sparse_column_with_keys`. Each key in the list will get assigned an
auto-incremental ID starting from 0. For example, for the `gender` column we can
-assign the feature string "female" to an integer ID of 0 and "male" to 1 by
+assign the feature string "Female" to an integer ID of 0 and "Male" to 1 by
doing:
```python
gender = tf.contrib.layers.sparse_column_with_keys(
- column_name="gender", keys=["female", "male"])
+ column_name="gender", keys=["Female", "Male"])
```
What if we don't know the set of possible values in advance? Not a problem. We
diff --git a/tensorflow/g3doc/tutorials/wide_and_deep/index.md b/tensorflow/g3doc/tutorials/wide_and_deep/index.md
index da7b2f7..760e4ba 100644
--- a/tensorflow/g3doc/tutorials/wide_and_deep/index.md
+++ b/tensorflow/g3doc/tutorials/wide_and_deep/index.md
@@ -16,7 +16,8 @@
you're interested in learning more about how Wide & Deep Learning works, please
check out our [research paper](http://arxiv.org/abs/1606.07792).
-
+![Wide & Deep Spectrum of Models]
+(../../images/wide_n_deep.svg "Wide & Deep")
The figure above shows a comparison of a wide model (logistic regression with
sparse features and transformations), a deep model (feed-forward neural network
@@ -85,7 +86,9 @@
import tensorflow as tf
# Categorical base columns.
-gender = tf.contrib.layers.sparse_column_with_keys(column_name="gender", keys=["female", "male"])
+gender = tf.contrib.layers.sparse_column_with_keys(column_name="gender", keys=["Female", "Male"])
+race = tf.contrib.layers.sparse_column_with_keys(column_name="race", keys=[
+ "Amer-Indian-Eskimo", "Asian-Pac-Islander", "Black", "Other", "White"])
education = tf.contrib.layers.sparse_column_with_hash_bucket("education", hash_bucket_size=1000)
relationship = tf.contrib.layers.sparse_column_with_hash_bucket("relationship", hash_bucket_size=100)
workclass = tf.contrib.layers.sparse_column_with_hash_bucket("workclass", hash_bucket_size=100)
diff --git a/tensorflow/models/image/cifar10/cifar10.py b/tensorflow/models/image/cifar10/cifar10.py
index 4908964..fb3a42c 100644
--- a/tensorflow/models/image/cifar10/cifar10.py
+++ b/tensorflow/models/image/cifar10/cifar10.py
@@ -391,4 +391,5 @@
print()
statinfo = os.stat(filepath)
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
- tarfile.open(filepath, 'r:gz').extractall(dest_directory)
+
+ tarfile.open(filepath, 'r:gz').extractall(dest_directory)
diff --git a/tensorflow/models/rnn/ptb/ptb_word_lm.py b/tensorflow/models/rnn/ptb/ptb_word_lm.py
index 39b5cb4..f4f2888 100644
--- a/tensorflow/models/rnn/ptb/ptb_word_lm.py
+++ b/tensorflow/models/rnn/ptb/ptb_word_lm.py
@@ -339,7 +339,7 @@
tf.scalar_summary("Validation Loss", mvalid.cost)
with tf.name_scope("Test"):
- test_input = PTBInput(config=config, data=test_data, name="TestInput")
+ test_input = PTBInput(config=eval_config, data=test_data, name="TestInput")
with tf.variable_scope("Model", reuse=True, initializer=initializer):
mtest = PTBModel(is_training=False, config=eval_config,
input_=test_input)
@@ -347,7 +347,7 @@
sv = tf.train.Supervisor(logdir=FLAGS.save_path)
with sv.managed_session() as session:
for i in range(config.max_max_epoch):
- lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
+ lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
m.assign_lr(session, config.learning_rate * lr_decay)
print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 7e3206e..baa48ec 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -213,7 +213,7 @@
additional_deps = ["//tensorflow:tensorflow_py"],
)
-tf_py_test(
+cuda_py_test(
name = "matrix_triangular_solve_op_test",
size = "small",
srcs = ["matrix_triangular_solve_op_test.py"],
diff --git a/tensorflow/python/kernel_tests/conv2d_transpose_test.py b/tensorflow/python/kernel_tests/conv2d_transpose_test.py
index a3fddcb..77f783d 100644
--- a/tensorflow/python/kernel_tests/conv2d_transpose_test.py
+++ b/tensorflow/python/kernel_tests/conv2d_transpose_test.py
@@ -21,6 +21,7 @@
import numpy as np
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
+from tensorflow.python.client import device_lib
class Conv2DTransposeTest(tf.test.TestCase):
@@ -157,6 +158,119 @@
err_tolerance = 0.0005
self.assertLess(err, err_tolerance)
+ def testConv2DTransposeSingleStrideNCHW(self):
+ # `NCHW` data fomat is only supported for `GPU` device.
+ if tf.test.is_gpu_available():
+ with self.test_session(use_gpu=True):
+ strides = [1, 1, 1, 1]
+
+ # Input, output: [batch, depth, height, width, depth]
+ x_shape = [2, 3, 6, 4]
+ y_shape = [2, 2, 6, 4]
+
+ # Filter: [kernel_height, kernel_width, output_depth, input_depth]
+ f_shape = [3, 3, 2, 3]
+
+ x = tf.constant(1.0, shape=x_shape, name="x", dtype=tf.float32)
+ f = tf.constant(1.0, shape=f_shape, name="filter", dtype=tf.float32)
+
+ output = tf.nn.conv2d_transpose(x, f, y_shape, strides=strides,
+ padding="SAME", data_format='NCHW')
+
+ value = output.eval()
+ for n in xrange(x_shape[0]):
+ for k in xrange(f_shape[2]):
+ for w in xrange(y_shape[3]):
+ for h in xrange(y_shape[2]):
+ target = 4 * 3.0
+ h_in = h > 0 and h < y_shape[2] - 1
+ w_in = w > 0 and w < y_shape[3] - 1
+ if h_in and w_in:
+ target += 5 * 3.0
+ elif h_in or w_in:
+ target += 2 * 3.0
+ self.assertAllClose(target, value[n, k, h, w])
+
+ def testConv2DTransposeSameNCHW(self):
+ # `NCHW` data fomat is only supported for `GPU` device.
+ if tf.test.is_gpu_available():
+ with self.test_session(use_gpu=True):
+ strides = [1, 1, 2, 2]
+
+ # Input, output: [batch, depth, height, width]
+ x_shape = [2, 3, 6, 4]
+ y_shape = [2, 2, 12, 8]
+
+ # Filter: [kernel_height, kernel_width, output_depth, input_depth]
+ f_shape = [3, 3, 2, 3]
+
+ x = tf.constant(1.0, shape=x_shape, name="x", dtype=tf.float32)
+ f = tf.constant(1.0, shape=f_shape, name="filter", dtype=tf.float32)
+
+ output = tf.nn.conv2d_transpose(x, f, y_shape, strides=strides,
+ padding="SAME", data_format='NCHW')
+
+ value = output.eval()
+ for n in xrange(x_shape[0]):
+ for k in xrange(f_shape[2]):
+ for w in xrange(y_shape[3]):
+ for h in xrange(y_shape[2]):
+ target = 3.0
+ # We add a case for locations divisible by the stride.
+ h_in = h % strides[2] == 0 and h > 0 and h < y_shape[2] - 1
+ w_in = w % strides[3] == 0 and w > 0 and w < y_shape[3] - 1
+ if h_in and w_in:
+ target += 9.0
+ elif h_in or w_in:
+ target += 3.0
+ self.assertAllClose(target, value[n, k, h, w])
+
+ def testConv2DTransposeValidNCHW(self):
+ # `NCHW` data fomat is only supported for `GPU` device.
+ if tf.test.is_gpu_available():
+ with self.test_session(use_gpu=True):
+ strides = [1, 1, 2, 2]
+
+ # Input, output: [batch, depth, height, width]
+ x_shape = [2, 3, 6, 4]
+ y_shape = [2, 2, 13, 9]
+
+ # Filter: [kernel_height, kernel_width, output_depth, input_depth]
+ f_shape = [3, 3, 2, 3]
+
+ x = tf.constant(1.0, shape=x_shape, name="x", dtype=tf.float32)
+ f = tf.constant(1.0, shape=f_shape, name="filter", dtype=tf.float32)
+ output = tf.nn.conv2d_transpose(x, f, y_shape, strides=strides,
+ padding="VALID", data_format='NCHW')
+
+ value = output.eval()
+ cache_values = np.zeros(y_shape, dtype=np.float32)
+ # The amount of padding added
+ pad = 1
+ for n in xrange(x_shape[0]):
+ for k in xrange(f_shape[2]):
+ for w in xrange(pad, y_shape[3] - pad):
+ for h in xrange(pad, y_shape[2] - pad):
+ target = 3.0
+ # We add a case for locations divisible by the stride.
+ h_in = h % strides[
+ 2] == 0 and h > pad and h < y_shape[2] - 1 - pad
+ w_in = w % strides[
+ 3] == 0 and w > pad and w < y_shape[3] - 1 - pad
+ if h_in and w_in:
+ target += 9.0
+ elif h_in or w_in:
+ target += 3.0
+ cache_values[n, k, h, w] = target
+
+ # copy values in the border
+ cache_values[n, k, :, 0] = cache_values[n, k, :, 1]
+ cache_values[n, k, :, -1] = cache_values[n, k, :, -2]
+ cache_values[n, k, 0, :] = cache_values[n, k, 1, :]
+ cache_values[n, k, -1, :] = cache_values[n, k, -2, :]
+
+ self.assertAllClose(cache_values, value)
+
if __name__ == "__main__":
tf.test.main()
diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py
index cef8bfd..60eb7c2 100644
--- a/tensorflow/python/kernel_tests/cwise_ops_test.py
+++ b/tensorflow/python/kernel_tests/cwise_ops_test.py
@@ -1356,6 +1356,18 @@
elif x.dtype == np.float64:
self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5)
+ def testScalar(self):
+ c = True
+ x = np.random.rand(1, 3, 2) * 100
+ y = np.random.rand(1, 3, 2) * 100
+ for t in [np.float16, np.float32, np.float64, np.int32, np.int64,
+ np.complex64, np.complex128]:
+ xt = x.astype(t)
+ yt = y.astype(t)
+ self._compare(c, xt, yt, use_gpu=False)
+ if t in [np.float16, np.float32, np.float64]:
+ self._compare(c, xt, yt, use_gpu=True)
+
def testBasic(self):
c = np.random.randint(0, 2, 6).astype(np.bool).reshape(1, 3, 2)
x = np.random.rand(1, 3, 2) * 100
diff --git a/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py b/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py
index 411f51a..c415482 100644
--- a/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py
@@ -24,15 +24,17 @@
class MatrixTriangularSolveOpTest(tf.test.TestCase):
def _verifySolveAllWays(self, x, y, batch_dims=None):
- for lower in True, False:
- for adjoint in True, False:
- self._verifySolve(x,
- y,
- lower=lower,
- adjoint=adjoint,
- batch_dims=batch_dims)
+ for use_gpu in True, False:
+ for lower in True, False:
+ for adjoint in True, False:
+ self._verifySolve(x,
+ y,
+ lower=lower,
+ adjoint=adjoint,
+ batch_dims=batch_dims,
+ use_gpu=use_gpu)
- def _verifySolve(self, x, y, lower=True, adjoint=False, batch_dims=None):
+ def _verifySolve(self, x, y, lower=True, adjoint=False, batch_dims=None, use_gpu=False):
for np_type in [np.float32, np.float64]:
a = x.astype(np_type)
b = y.astype(np_type)
@@ -52,7 +54,7 @@
a_np = np.tile(a_np, batch_dims + [1, 1])
b = np.tile(b, batch_dims + [1, 1])
- with self.test_session():
+ with self.test_session(use_gpu=use_gpu):
tf_ans = tf.matrix_triangular_solve(a, b, lower=lower, adjoint=adjoint)
out = tf_ans.eval()
np_ans = np.linalg.solve(a_np, b)
diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py
index b644f2a..4755edc 100644
--- a/tensorflow/python/kernel_tests/relu_op_test.py
+++ b/tensorflow/python/kernel_tests/relu_op_test.py
@@ -264,6 +264,42 @@
print("elu (float64) gradient err = ", err)
self.assertLess(err, 1e-6)
+ def testGradGradFloat32(self):
+ with self.test_session():
+ x = tf.constant(
+ [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+ shape=[2, 5], name="x")
+ y = tf.nn.elu(x, name="elu")
+ z = tf.gradients(y, x)
+ x_init = np.asarray(
+ [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+ dtype=np.float32, order="F")
+ err = tf.test.compute_gradient_error(x,
+ [2, 5],
+ z[0],
+ [2, 5],
+ x_init_value=x_init)
+ print("elu (float32) gradient of gradient err = ", err)
+ self.assertLess(err, 1e-4)
+ def testGradGradFloat64(self):
+ with self.test_session():
+ x = tf.constant(
+ [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9],
+ shape=[2, 5], dtype=tf.float64, name="x")
+ y = tf.nn.elu(x, name="elu")
+ z = tf.gradients(y, x)
+ x_init = np.asarray(
+ [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+ dtype=np.float64, order="F")
+ err = tf.test.compute_gradient_error(x,
+ [2, 5],
+ z[0],
+ [2, 5],
+ x_init_value=x_init)
+ print("elu (float64) gradient of gradient err = ", err)
+ self.assertLess(err, 1e-6)
+
+
if __name__ == "__main__":
tf.test.main()
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index f5655f8..1a34634 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -1795,7 +1795,7 @@
performed
instead:
```prettyprint
- tf.cumprod([a, b, c], exclusive=True) ==> [0, a, a * b]
+ tf.cumprod([a, b, c], exclusive=True) ==> [1, a, a * b]
```
By setting the `reverse` kwarg to `True`, the cumprod is performed in the
@@ -1807,7 +1807,7 @@
The `reverse` and `exclusive` kwargs can also be combined:
```prettyprint
- tf.cumprod([a, b, c], exclusive=True, reverse=True) ==> [b * c, c, 0]
+ tf.cumprod([a, b, c], exclusive=True, reverse=True) ==> [b * c, c, 1]
```
Args:
diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py
index 6a35cfb..149bde4 100644
--- a/tensorflow/python/ops/nn_grad.py
+++ b/tensorflow/python/ops/nn_grad.py
@@ -25,7 +25,7 @@
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import sparse_ops
from tensorflow.python.ops import gen_nn_ops
-
+from tensorflow.python.ops import gen_math_ops
@ops.RegisterGradient("Conv2DBackpropInput")
def _Conv2DBackpropInputGrad(op, grad):
@@ -268,6 +268,14 @@
return gen_nn_ops._relu_grad(grad, op.outputs[0])
+@ops.RegisterGradient("EluGrad")
+def _EluGradGrad(op, grad):
+ x = op.inputs[1]
+ return (gen_nn_ops._elu_grad(grad, op.outputs[0]),
+ gen_math_ops.select(x < 0., gen_nn_ops._elu_grad(grad, op.outputs[0] + 1),
+ array_ops.zeros(shape = array_ops.shape(x), dtype = x.dtype)))
+
+
@ops.RegisterGradient("Relu6")
def _Relu6Grad(op, grad):
return gen_nn_ops._relu6_grad(grad, op.inputs[0])
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 421e767..5b08dcd 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1010,6 +1010,7 @@
output_shape,
strides,
padding="SAME",
+ data_format="NHWC",
name=None):
"""The transpose of `conv2d`.
@@ -1020,7 +1021,8 @@
Args:
value: A 4-D `Tensor` of type `float` and shape
- `[batch, height, width, in_channels]`.
+ `[batch, height, width, in_channels]` for `NHWC` data format or
+ `[batch, in_channels, height, width]` for `NCHW` data format.
filter: A 4-D `Tensor` with the same type as `value` and shape
`[height, width, output_channels, in_channels]`. `filter`'s
`in_channels` dimension must match that of `value`.
@@ -1030,6 +1032,7 @@
dimension of the input tensor.
padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm.
See the [comment here](https://www.tensorflow.org/api_docs/python/nn.html#convolution)
+ data_format: A string. 'NHWC' and 'NCHW' are supported.
name: Optional name for the returned tensor.
Returns:
@@ -1041,9 +1044,12 @@
"""
with ops.name_scope(name, "conv2d_transpose",
[value, filter, output_shape]) as name:
+ if data_format not in ("NCHW", "NHWC"):
+ raise ValueError("data_format has to be either NCHW or NHWC.")
value = ops.convert_to_tensor(value, name="value")
filter = ops.convert_to_tensor(filter, name="filter")
- if not value.get_shape()[3].is_compatible_with(filter.get_shape()[3]):
+ axis = 3 if data_format=="NHWC" else 1
+ if not value.get_shape()[axis].is_compatible_with(filter.get_shape()[3]):
raise ValueError("input channels does not match filter's input channels, "
"{} != {}".format(value.get_shape()[3], filter.get_shape(
)[3]))
@@ -1055,10 +1061,10 @@
if isinstance(output_shape, (list, np.ndarray)):
# output_shape's shape should be == [4] if reached this point.
- if not filter.get_shape()[2].is_compatible_with(output_shape[3]):
+ if not filter.get_shape()[2].is_compatible_with(output_shape[axis]):
raise ValueError(
"output_shape does not match filter's output channels, "
- "{} != {}".format(output_shape[3], filter.get_shape()[2]))
+ "{} != {}".format(output_shape[axis], filter.get_shape()[2]))
if padding != "VALID" and padding != "SAME":
raise ValueError("padding must be either VALID or SAME:"
@@ -1069,6 +1075,7 @@
out_backprop=value,
strides=strides,
padding=padding,
+ data_format=data_format,
name=name)
diff --git a/tensorflow/python/training/learning_rate_decay.py b/tensorflow/python/training/learning_rate_decay.py
index a86586d..1d9dc3f 100644
--- a/tensorflow/python/training/learning_rate_decay.py
+++ b/tensorflow/python/training/learning_rate_decay.py
@@ -68,7 +68,7 @@
Must be positive. See the decay computation above.
decay_rate: A scalar `float32` or `float64` `Tensor` or a
Python number. The decay rate.
- staircase: Boolean. It `True` decay the learning rate at discrete intervals
+ staircase: Boolean. If `True` decay the learning rate at discrete intervals
name: String. Optional name of the operation. Defaults to
'ExponentialDecay'.
diff --git a/tensorflow/stream_executor/cuda/cuda_diagnostics.cc b/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
index 54e00d5..def2805 100644
--- a/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
+++ b/tensorflow/stream_executor/cuda/cuda_diagnostics.cc
@@ -15,7 +15,10 @@
#include "tensorflow/stream_executor/cuda/cuda_diagnostics.h"
+#if !defined(PLATFORM_WINDOWS)
#include <dirent.h>
+#endif
+
#include <limits.h>
#include <stddef.h>
#include <stdio.h>
@@ -25,11 +28,13 @@
#include <IOKit/kext/KextManager.h>
#include <mach-o/dyld.h>
#else
+#if !defined(PLATFORM_WINDOWS)
#include <link.h>
-#include <sys/stat.h>
#include <sys/sysmacros.h>
-#endif
#include <unistd.h>
+#endif
+#include <sys/stat.h>
+#endif
#include <algorithm>
#include <memory>
#include <vector>
@@ -135,7 +140,7 @@
<< "(" << port::Hostname() << ")";
}
CFRelease(kext_infos);
-#else
+#elif !defined(PLATFORM_WINDOWS)
if (access(kDriverVersionPath, F_OK) != 0) {
LOG(INFO) << "kernel driver does not appear to be running on this host "
<< "(" << port::Hostname() << "): "
@@ -158,7 +163,7 @@
/* static */ void Diagnostician::LogDriverVersionInformation() {
LOG(INFO) << "hostname: " << port::Hostname();
-
+#ifndef PLATFORM_WINDOWS
if (VLOG_IS_ON(1)) {
const char *value = getenv("LD_LIBRARY_PATH");
string library_path = value == nullptr ? "" : value;
@@ -180,17 +185,17 @@
closedir(dir);
}
}
-
port::StatusOr<DriverVersion> dso_version = FindDsoVersion();
LOG(INFO) << "libcuda reported version is: "
<< DriverVersionStatusToString(dso_version);
port::StatusOr<DriverVersion> kernel_version = FindKernelDriverVersion();
LOG(INFO) << "kernel reported version is: "
- << DriverVersionStatusToString(kernel_version);
+ << DriverVersionStatusToString(kernel_version);
+#endif
// OS X kernel driver does not report version accurately
-#if !defined(__APPLE__)
+#if !defined(__APPLE__) && !defined(PLATFORM_WINDOWS)
if (kernel_version.ok() && dso_version.ok()) {
WarnOnDsoKernelMismatch(dso_version, kernel_version);
}
@@ -227,6 +232,7 @@
result = StringToDriverVersion(version);
}
#else
+#if !defined(PLATFORM_WINDOWS)
// Callback used when iterating through DSOs. Looks for the driver-interfacing
// DSO and yields its version number into the callback data, when found.
auto iterate_phdr =
@@ -259,6 +265,7 @@
dl_iterate_phdr(iterate_phdr, &result);
#endif
+#endif
return result;
}
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 1c13379..64e0641 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -3200,6 +3200,7 @@
Stream* stream, const dnn::NormalizeDescriptor& normalize_descriptor,
const DeviceMemory<float>& input_data, DeviceMemory<float>* output_data) {
LOG(FATAL) << "not yet implemented"; // TODO(leary)
+ return false;
}
bool CudnnSupport::DoNormalizeWithDimensions(
diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc
index 095c82a..6a4c689 100644
--- a/tensorflow/stream_executor/cuda/cuda_driver.cc
+++ b/tensorflow/stream_executor/cuda/cuda_driver.cc
@@ -19,8 +19,8 @@
#include <stdint.h>
#include <stdlib.h>
#include <set>
-#include "tensorflow/stream_executor/platform/port.h"
+#include "tensorflow/stream_executor/platform/port.h"
#include "tensorflow/stream_executor/cuda/cuda_diagnostics.h"
#include "tensorflow/stream_executor/dso_loader.h"
#include "tensorflow/stream_executor/lib/casts.h"
@@ -38,6 +38,14 @@
#include "tensorflow/stream_executor/platform/port.h"
#include "tensorflow/stream_executor/lib/inlined_vector.h"
+#if defined(PLATFORM_WINDOWS)
+// TODO: in windows ARRAYSIZE is defined in winnt.h but including it
+// here creates a conflict with cuda.h - for now define it here.
+#define ARRAYSIZE(a) \
+ ((sizeof(a) / sizeof(*(a))) / \
+ static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
+#endif
+
bool FLAGS_gpuexec_cuda_driver_inject_init_error = false;
bool FLAGS_gpuexec_cuda_sync_around_driver_calls = false;
bool FLAGS_gpuexec_cuda_device_0_only = false;
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index 52256a7..f69853d 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -18,8 +18,12 @@
#if defined(__APPLE__)
#include <mach-o/dyld.h>
#endif
+#if defined(PLATFORM_WINDOWS)
+#include <windows.h>
+#define PATH_MAX MAX_PATH
+#else
#include <unistd.h>
-
+#endif
#include "tensorflow/stream_executor/cuda/cuda_diagnostics.h"
#include "tensorflow/stream_executor/cuda/cuda_driver.h"
#include "tensorflow/stream_executor/cuda/cuda_event.h"
@@ -204,7 +208,12 @@
_NSGetExecutablePath(unresolved_path, &buffer_size);
CHECK_ERR(realpath(unresolved_path, exe_path) ? 1 : -1);
#else
- CHECK_ERR(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1));
+#if defined(PLATFORM_WINDOWS)
+ HMODULE hModule = GetModuleHandle(NULL);
+ GetModuleFileName(hModule, exe_path, MAX_PATH);
+#else
+ CHECK_ERR(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1));
+#endif
#endif
// Make sure it's null-terminated:
exe_path[sizeof(exe_path) - 1] = 0;
@@ -908,8 +917,10 @@
// could use the file::* utilities).
FILE *file = fopen(filename.c_str(), "r");
if (file == nullptr) {
+#if !defined(PLATFORM_WINDOWS)
LOG(ERROR) << "could not open file to read NUMA node: " << filename
<< "\nYour kernel may have been built without NUMA support.";
+#endif
return kUnknownNumaNode;
}
diff --git a/tensorflow/stream_executor/cuda/cuda_rng.cc b/tensorflow/stream_executor/cuda/cuda_rng.cc
index a0ee677..88b3a4f 100644
--- a/tensorflow/stream_executor/cuda/cuda_rng.cc
+++ b/tensorflow/stream_executor/cuda/cuda_rng.cc
@@ -15,8 +15,6 @@
#include "tensorflow/stream_executor/cuda/cuda_rng.h"
-#include <dlfcn.h>
-
#include "tensorflow/stream_executor/cuda/cuda_activation.h"
#include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h"
#include "tensorflow/stream_executor/cuda/cuda_helpers.h"
diff --git a/tensorflow/stream_executor/dso_loader.cc b/tensorflow/stream_executor/dso_loader.cc
index c9b305a..319f456 100644
--- a/tensorflow/stream_executor/dso_loader.cc
+++ b/tensorflow/stream_executor/dso_loader.cc
@@ -18,13 +18,17 @@
#include "tensorflow/stream_executor/dso_loader.h"
-#include <dlfcn.h>
#include <limits.h>
#if defined(__APPLE__)
#include <mach-o/dyld.h>
#endif
#include <stdlib.h>
+#if defined(PLATFORM_WINDOWS)
+#include <windows.h>
+#define PATH_MAX MAX_PATH
+#else
#include <unistd.h>
+#endif
#include <initializer_list>
#include <vector>
@@ -45,7 +49,7 @@
string GetCudnnVersion() { return TF_CUDNN_VERSION; }
/* static */ port::Status DsoLoader::GetCublasDsoHandle(void** dso_handle) {
- return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
+ return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
"cublas", GetCudaVersion()),
GetCudaLibraryDirPath()),
dso_handle);
@@ -55,35 +59,42 @@
// libcudnn is versioned differently than the other libraries and may have a
// different version number than other CUDA libraries. See b/22397368 for
// some details about the complications surrounding this.
- return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
+ return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
"cudnn", GetCudnnVersion()),
GetCudaLibraryDirPath()),
dso_handle);
}
/* static */ port::Status DsoLoader::GetCufftDsoHandle(void** dso_handle) {
- return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
+ return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
"cufft", GetCudaVersion()),
GetCudaLibraryDirPath()),
dso_handle);
}
/* static */ port::Status DsoLoader::GetCurandDsoHandle(void** dso_handle) {
- return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
+ return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
"curand", GetCudaVersion()),
GetCudaLibraryDirPath()),
dso_handle);
}
/* static */ port::Status DsoLoader::GetLibcudaDsoHandle(void** dso_handle) {
+#if defined(PLATFORM_WINDOWS)
return GetDsoHandle(
- FindDsoPath(tensorflow::internal::FormatLibraryFileName("cuda", "1"),
+ FindDsoPath(port::Env::Default()->FormatLibraryFileName("nvcuda", ""),
GetCudaDriverLibraryPath()),
dso_handle);
+#else
+ return GetDsoHandle(
+ FindDsoPath(port::Env::Default()->FormatLibraryFileName("cuda", "1"),
+ GetCudaDriverLibraryPath()),
+ dso_handle);
+#endif
}
/* static */ port::Status DsoLoader::GetLibcuptiDsoHandle(void** dso_handle) {
- return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
+ return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
"cupti", GetCudaVersion()),
GetCudaCuptiLibraryPath()),
dso_handle);
@@ -101,8 +112,6 @@
return port::Status(port::error::INVALID_ARGUMENT,
"Only LoadKind::kLocal is currently supported");
}
- int dynload_flags =
- RTLD_LAZY | (load_kind == LoadKind::kLocal ? RTLD_LOCAL : RTLD_GLOBAL);
string path_string = path.ToString();
port::Status s =
port::Env::Default()->LoadLibrary(path_string.c_str(), dso_handle);
@@ -125,6 +134,9 @@
char unresolved_path[buffer_size];
_NSGetExecutablePath(unresolved_path, &buffer_size);
CHECK_ERR(realpath(unresolved_path, exe_path) ? 1 : -1);
+#elif defined(PLATFORM_WINDOWS)
+ HMODULE hModule = GetModuleHandle(NULL);
+ GetModuleFileName(hModule, exe_path, MAX_PATH);
#else
CHECK_ERR(readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1));
#endif
@@ -159,6 +171,9 @@
}
/* static */ bool DsoLoader::TrySymbolicDereference(string* candidate) {
+#if defined(PLATFORM_WINDOWS)
+ return false;
+#else
char buf[PATH_MAX];
char* result = realpath(candidate->c_str(), buf);
if (result == nullptr) {
@@ -168,6 +183,7 @@
<< result << "\"";
*candidate = result;
return true;
+#endif
}
/* static */ string DsoLoader::FindDsoPath(port::StringPiece library_name,
@@ -206,6 +222,8 @@
/* static */ string DsoLoader::GetCudaDriverLibraryPath() {
#if defined(__APPLE__)
return "external/local_config_cuda/cuda/driver/lib";
+#elif defined(PLATFORM_WINDOWS)
+ return "";
#else
return "external/local_config_cuda/cuda/driver/lib64";
#endif
diff --git a/tensorflow/stream_executor/lib/process_state.cc b/tensorflow/stream_executor/lib/process_state.cc
index fcf8847..be4295b 100644
--- a/tensorflow/stream_executor/lib/process_state.cc
+++ b/tensorflow/stream_executor/lib/process_state.cc
@@ -15,8 +15,13 @@
#include "tensorflow/stream_executor/lib/process_state.h"
+#if defined(PLATFORM_WINDOWS)
+#include <direct.h>
+#include <stdlib.h>
+#include <WinSock2.h>
+#else
#include <unistd.h>
-
+#endif
#include <memory>
namespace perftools {
@@ -27,7 +32,7 @@
char hostname[1024];
gethostname(hostname, sizeof hostname);
hostname[sizeof hostname - 1] = 0;
- return hostname;
+ return std::string(hostname);
}
bool GetCurrentDirectory(string* dir) {
diff --git a/tensorflow/stream_executor/lib/static_threadlocal.h b/tensorflow/stream_executor/lib/static_threadlocal.h
index a839420..6e2bd0d 100644
--- a/tensorflow/stream_executor/lib/static_threadlocal.h
+++ b/tensorflow/stream_executor/lib/static_threadlocal.h
@@ -16,6 +16,10 @@
#ifndef TENSORFLOW_STREAM_EXECUTOR_LIB_STATIC_THREADLOCAL_H_
#define TENSORFLOW_STREAM_EXECUTOR_LIB_STATIC_THREADLOCAL_H_
+#ifdef _MSC_VER
+#define __thread __declspec(thread)
+#endif
+
// For POD types in TLS mode, s_obj_VAR is the thread-local variable.
#define SE_STATIC_THREAD_LOCAL_POD(_Type_, _var_) \
static __thread _Type_ s_obj_##_var_; \
diff --git a/tensorflow/tensorboard/backend/server.py b/tensorflow/tensorboard/backend/server.py
index 630d342..f590b5e 100644
--- a/tensorflow/tensorboard/backend/server.py
+++ b/tensorflow/tensorboard/backend/server.py
@@ -81,7 +81,7 @@
else:
run_name = None
path = specification
- if not io_wrapper.IsGCSPath(path):
+ if not (io_wrapper.IsGCSPath(path) or path.startswith('hdfs://')):
path = os.path.realpath(path)
files[path] = run_name
return files
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 6db3978..24fd1d3 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -563,7 +563,7 @@
for dep in ctx.attr.deps:
inputs += dep.cc.transitive_headers
inputs += ctx.files._swiglib
- swig_include_dirs = set([f.root.path for f in inputs if f.root.path])
+ swig_include_dirs = set(_get_repository_roots(ctx, inputs))
swig_include_dirs += sorted([f.dirname for f in ctx.files._swiglib])
args = ["-c++",
"-python",
@@ -616,6 +616,35 @@
implementation = _py_wrap_cc_impl,
)
+def _get_repository_roots(ctx, files):
+ """Returns abnormal root directories under which files reside.
+
+ When running a ctx.action, source files within the main repository are all
+ relative to the current directory; however, files that are generated or exist
+ in remote repositories will have their root directory be a subdirectory,
+ e.g. bazel-out/local-fastbuild/genfiles/external/jpeg_archive. This function
+ returns the set of these devious directories, ranked and sorted by popularity
+ in order to hopefully minimize the number of I/O system calls within the
+ compiler, because includes have quadratic complexity.
+ """
+ result = {}
+ for f in files:
+ root = f.root.path
+ if root:
+ if root not in result:
+ result[root] = 0
+ result[root] -= 1
+ work = f.owner.workspace_root
+ if work:
+ if root:
+ root += "/"
+ root += work
+ if root:
+ if root not in result:
+ result[root] = 0
+ result[root] -= 1
+ return [k for v, k in sorted([(v, k) for k, v in result.items()])]
+
# Bazel rule for collecting the header files that a target depends on.
def _transitive_hdrs_impl(ctx):
outputs = set()
diff --git a/tensorflow/tools/ci_build/builds/test_installation.sh b/tensorflow/tools/ci_build/builds/test_installation.sh
index 5d7d6ec..09b2ae5 100755
--- a/tensorflow/tools/ci_build/builds/test_installation.sh
+++ b/tensorflow/tools/ci_build/builds/test_installation.sh
@@ -47,10 +47,6 @@
# TF_BUILD_BAZEL_CLEAN, if set to any non-empty and non-0 value, directs the
# script to perform bazel clean prior to main build and test steps.
#
-# TF_BUILD_SERIAL_INSTALL_TESTS, if set to any non-empty and non-0 value,
-# will force the Python install tests to run serially, overriding than the
-# concurrent testing behavior.
-#
# TF_GPU_COUNT, Set the number of GPUs in the system. We run only this many
# concurrent tests when running GPU tests.
#
@@ -411,21 +407,21 @@
FAILED_TESTS=""
FAILED_TEST_LOGS=""
-N_JOBS=$(grep -c ^processor /proc/cpuinfo)
-if [[ -z ${N_JOBS} ]]; then
- # Try the Mac way of getting number of CPUs
- N_JOBS=$(sysctl -n hw.ncpu)
-fi
-
-if [[ -z ${N_JOBS} ]]; then
- N_JOBS=8
- echo "Cannot determine the number of processors"
- echo "Using default concurrent job counter ${N_JOBS}"
-fi
-
-if [[ ! -z "${TF_BUILD_SERIAL_INSTALL_TESTS}" ]] &&
- [[ "${TF_BUILD_SERIAL_INSTALL_TESTS}" != "0" ]]; then
+if [[ "${IS_GPU}" == "1" ]]; then
N_JOBS=$TF_GPU_COUNT
+else
+ N_JOBS=$(grep -c ^processor /proc/cpuinfo)
+ if [[ -z ${N_JOBS} ]]; then
+ # Try the Mac way of getting number of CPUs
+ N_JOBS=$(sysctl -n hw.ncpu)
+ fi
+
+ # If still cannot determine the number of CPUs, pick 8.
+ if [[ -z ${N_JOBS} ]]; then
+ N_JOBS=8
+ echo "Cannot determine the number of processors"
+ echo "Using default concurrent job counter ${N_JOBS}"
+ fi
fi
echo "Running Python tests-on-install with ${N_JOBS} concurrent jobs..."
@@ -485,9 +481,14 @@
TEST_LOGS="${TEST_LOGS} ${TEST_LOG}"
# Launch test asynchronously
- "${SCRIPT_DIR}/../gpu_build/parallel_gpu_execute.sh" \
+ if [[ "${IS_GPU}" == "1" ]]; then
+ "${SCRIPT_DIR}/../gpu_build/parallel_gpu_execute.sh" \
+ "${SCRIPT_DIR}/py_test_delegate.sh" \
+ "${PYTHON_BIN_PATH}" "${PY_TEST_DIR}/${TEST_BASENAME}" "${TEST_LOG}" &
+ else
"${SCRIPT_DIR}/py_test_delegate.sh" \
- "${PYTHON_BIN_PATH}" "${PY_TEST_DIR}/${TEST_BASENAME}" "${TEST_LOG}" &
+ "${PYTHON_BIN_PATH}" "${PY_TEST_DIR}/${TEST_BASENAME}" "${TEST_LOG}" &
+ fi
if [[ "${TEST_COUNTER}" -ge "${N_PAR_TESTS}" ]]; then
# Run in exclusive mode
diff --git a/tensorflow/tools/ci_build/builds/test_tutorials.sh b/tensorflow/tools/ci_build/builds/test_tutorials.sh
old mode 100644
new mode 100755
index aafa76b..aaad47c
--- a/tensorflow/tools/ci_build/builds/test_tutorials.sh
+++ b/tensorflow/tools/ci_build/builds/test_tutorials.sh
@@ -146,7 +146,7 @@
run_in_directory "${TEST_DIR}" "${LOG_FILE}" \
tensorflow/examples/tutorials/mnist/mnist_with_summaries.py \
- --data_dir="${TUT_TEST_DATA_DIR}/mnist" --summaries_dir="${SUMMARIES_DIR}"
+ --data_dir="${TUT_TEST_DATA_DIR}/mnist" --log_dir="${SUMMARIES_DIR}"
# Verify final accuracy
FINAL_ACCURACY=$(grep "Accuracy at step" "${LOG_FILE}" \
diff --git a/tensorflow/tools/ci_build/ci_build.sh b/tensorflow/tools/ci_build/ci_build.sh
index 0f165cd..54587ef 100755
--- a/tensorflow/tools/ci_build/ci_build.sh
+++ b/tensorflow/tools/ci_build/ci_build.sh
@@ -103,10 +103,8 @@
BUILD_TAG="${BUILD_TAG:-tf_ci}"
# Add extra params for cuda devices and libraries for GPU container.
-if [ "${CONTAINER_TYPE}" == "gpu" ]; then
- # GPU pip tests-on-install concurrency is limited to the number of GPUs.
- GPU_EXTRA_PARAMS="${GPU_EXTRA_PARAMS} -e TF_BUILD_SERIAL_INSTALL_TESTS=1"
-else
+# And clear them if we are not building for GPU.
+if [ "${CONTAINER_TYPE}" != "gpu" ]; then
GPU_EXTRA_PARAMS=""
fi
diff --git a/tensorflow/tools/dist_test/build_server.sh b/tensorflow/tools/dist_test/build_server.sh
index 178fba8..878fabd 100755
--- a/tensorflow/tools/dist_test/build_server.sh
+++ b/tensorflow/tools/dist_test/build_server.sh
@@ -16,7 +16,14 @@
#
# Builds the test server for distributed (GRPC) TensorFlow
#
-# Usage: build_server.sh <docker_image_name> [--test]
+# Usage: build_server.sh <docker_image_name> <whl_url> [--test]
+#
+# Arguments:
+# docker_image_name: Name of the docker image to build.
+# E.g.: tensorflow/tf_grpc_test_server:0.11.0rc1
+#
+# whl_url: URL from which the TensorFlow whl file will be downloaded.
+# E.g.: https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
#
# The optional flag --test lets the script to use the Dockerfile for the
# testing GRPC server. Without the flag, the script will build the non-test
@@ -33,22 +40,35 @@
}
# Check arguments
-if [[ $# != 1 ]] && [[ $# != 2 ]]; then
- die "Usage: $0 <docker_image_name> [--test]"
+if [[ $# -lt 2 ]]; then
+ die "Usage: $0 <docker_image_name> <whl_url> [--test]"
fi
DOCKER_IMG_NAME=$1
-shift
+WHL_URL=$2
+shift 2
# Current script directory
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-DOCKER_FILE="${DIR}/server/Dockerfile"
+BUILD_DIR=$(mktemp -d)
+echo ""
+echo "Using whl file URL: ${WHL_URL}"
+echo "Building in temporary directory: ${BUILD_DIR}"
+
+cp -r ${DIR}/* "${BUILD_DIR}"/ || \
+ die "Failed to copy files to ${BUILD_DIR}"
+
+DOCKER_FILE="${BUILD_DIR}/server/Dockerfile"
if [[ $1 == "--test" ]]; then
- DOCKER_FILE="${DIR}/server/Dockerfile.test"
+ DOCKER_FILE="${BUILD_DIR}/server/Dockerfile.test"
fi
echo "Using Docker file: ${DOCKER_FILE}"
+# Download whl file into the build context directory.
+wget -P "${BUILD_DIR}" ${WHL_URL} || \
+ die "Failed to download tensorflow whl file from URL: ${WHL_URL}"
+
if [[ ! -f "${DOCKER_FILE}" ]]; then
die "ERROR: Unable to find dockerfile: ${DOCKER_FILE}"
fi
@@ -56,5 +76,8 @@
# Call docker build
docker build --no-cache -t "${DOCKER_IMG_NAME}" \
- -f "${DOCKER_FILE}" \
- "${DIR}"
+ -f "${DOCKER_FILE}" "${BUILD_DIR}" || \
+ die "Failed to build docker image: ${DOCKER_IMG_NAME}"
+
+# Clean up docker build context directory.
+rm -rf "${BUILD_DIR}"
diff --git a/tensorflow/tools/dist_test/server/Dockerfile b/tensorflow/tools/dist_test/server/Dockerfile
index 9cc61a8..4b13b81 100644
--- a/tensorflow/tools/dist_test/server/Dockerfile
+++ b/tensorflow/tools/dist_test/server/Dockerfile
@@ -34,9 +34,10 @@
python get-pip.py && \
rm get-pip.py
-# Install TensorFlow CPU version from nightly build
-RUN pip --no-cache-dir install \
- https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
+# Install TensorFlow wheel
+COPY tensorflow-*.whl /
+RUN pip install /tensorflow-*.whl && \
+ rm -f /tensorflow-*.whl
# Copy files, including the GRPC server binary at
# server/grpc_tensorflow_server.py
diff --git a/tensorflow/tools/dist_test/server/Dockerfile.test b/tensorflow/tools/dist_test/server/Dockerfile.test
index 5bafa29..22438f3 100644
--- a/tensorflow/tools/dist_test/server/Dockerfile.test
+++ b/tensorflow/tools/dist_test/server/Dockerfile.test
@@ -40,9 +40,10 @@
# Install python panda for the census wide&deep test
RUN pip install --upgrade pandas==0.18.1
-# Install TensorFlow CPU version.
-RUN pip --no-cache-dir install \
- https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
+# Install TensorFlow wheel
+COPY tensorflow-*.whl /
+RUN pip install /tensorflow-*.whl && \
+ rm -f /tensorflow-*.whl
# Copy files, including the GRPC server binary at
# server/grpc_tensorflow_server.py
diff --git a/tensorflow/tools/docker/Dockerfile b/tensorflow/tools/docker/Dockerfile
index bd0b4cc..a8f5f26 100644
--- a/tensorflow/tools/docker/Dockerfile
+++ b/tensorflow/tools/docker/Dockerfile
@@ -33,7 +33,7 @@
&& \
python -m ipykernel.kernelspec
-ENV TENSORFLOW_VERSION 0.11.0rc0
+ENV TENSORFLOW_VERSION 0.11.0rc1
# --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #
# These lines will be edited automatically by parameterized_docker_build.sh. #
diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu
index 9db6b73..9ad57d6 100644
--- a/tensorflow/tools/docker/Dockerfile.gpu
+++ b/tensorflow/tools/docker/Dockerfile.gpu
@@ -33,7 +33,7 @@
&& \
python -m ipykernel.kernelspec
-ENV TENSORFLOW_VERSION 0.11.0rc0
+ENV TENSORFLOW_VERSION 0.11.0rc1
# --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #
# These lines will be edited automatically by parameterized_docker_build.sh. #
diff --git a/tensorflow/tools/gcs_test/Dockerfile b/tensorflow/tools/gcs_test/Dockerfile
index 0abe3d6..782a63f 100644
--- a/tensorflow/tools/gcs_test/Dockerfile
+++ b/tensorflow/tools/gcs_test/Dockerfile
@@ -17,7 +17,7 @@
# Install nightly TensorFlow pip
RUN pip install \
- https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl
+ https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl
# Copy test files
RUN mkdir -p /gcs-smoke/python
diff --git a/tensorflow/tools/gcs_test/gcs_smoke_wrapper.sh b/tensorflow/tools/gcs_test/gcs_smoke_wrapper.sh
index 68800d6..2ce0fb3 100755
--- a/tensorflow/tools/gcs_test/gcs_smoke_wrapper.sh
+++ b/tensorflow/tools/gcs_test/gcs_smoke_wrapper.sh
@@ -81,7 +81,6 @@
cat ${LOG_FILE}
echo ""
-
# Clean up the newly created tfrecord file in GCS bucket.
# First, activate gcloud service account
"${GCLOUD_BIN}" auth activate-service-account \
@@ -96,13 +95,3 @@
"${GSUTIL_BIN}" rm "${NEW_TFREC_URL}" && \
echo "Cleaned up new tfrecord file in GCS: ${NEW_TFREC_URL}" || \
die "FAIL: Unable to clean up new tfrecord file in GCS: ${NEW_TFREC_URL}"
-
-# Also clean up newly created GCS dir.
-NEW_DIR_URL=$(grep "Creating dir" "${LOG_FILE}" | \
- awk '{print $NF}')
-if [[ -z ${NEW_DIR_URL} ]]; then
- die "FAIL: Unable to determine the URL to the new directory created in GCS."
-fi
-"${GSUTIL_BIN}" rm -r "${NEW_DIR_URL}" && \
- echo "Cleaned up new directory created in GCS: ${NEW_DIR_URL}" || \
- die "FAIL: Unable to clean up new directory created in GCS: ${NEW_DIR_URL}"
diff --git a/tensorflow/tools/gcs_test/python/gcs_smoke.py b/tensorflow/tools/gcs_test/python/gcs_smoke.py
index 0e0018f..23f45a9 100644
--- a/tensorflow/tools/gcs_test/python/gcs_smoke.py
+++ b/tensorflow/tools/gcs_test/python/gcs_smoke.py
@@ -35,7 +35,6 @@
FLAGS = flags.FLAGS
-
def create_examples(num_examples, input_mean):
"""Create ExampleProto's containg data."""
ids = np.arange(num_examples).reshape([num_examples, 1])
@@ -64,12 +63,48 @@
print("%s directory exists: %s" % (dir_name, dir_exists))
# List contents of just created directory.
- starttime = int(round(time.time() * 1000))
print("Listing directory %s." % dir_name)
+ starttime = int(round(time.time() * 1000))
print(file_io.list_directory(dir_name))
elapsed = int(round(time.time() * 1000)) - starttime
print("Listed directory %s in %s milliseconds" % (dir_name, elapsed))
+ # Delete directory.
+ print("Deleting directory %s." % dir_name)
+ starttime = int(round(time.time() * 1000))
+ file_io.delete_recursively(dir_name)
+ elapsed = int(round(time.time() * 1000)) - starttime
+ print("Deleted directory %s in %s milliseconds" % (dir_name, elapsed))
+
+def create_object_test():
+ """Verifies file_io's object manipulation methods ."""
+ starttime = int(round(time.time() * 1000))
+ dir_name = "%s/tf_gcs_test_%s" % (FLAGS.gcs_bucket_url, starttime)
+ print("Creating dir %s." % dir_name)
+ file_io.create_dir(dir_name)
+
+ # Create a file in this directory.
+ file_name = "%s/test_file.txt" % dir_name
+ print("Creating file %s." % file_name)
+ file_io.write_string_to_file(file_name, "test file creation.")
+
+ list_files_pattern = "%s/test_file*.txt" % dir_name
+ print("Getting files matching pattern %s." % list_files_pattern)
+ files_list = file_io.get_matching_files(list_files_pattern)
+ print(files_list)
+
+ assert len(files_list) == 1
+ assert files_list[0] == file_name
+
+ # Cleanup test files.
+ print("Deleting file %s." % file_name)
+ file_io.delete_file(file_name)
+
+ # Delete directory.
+ print("Deleting directory %s." % dir_name)
+ file_io.delete_recursively(dir_name)
+
+
if __name__ == "__main__":
# Sanity check on the GCS bucket URL.
if not FLAGS.gcs_bucket_url or not FLAGS.gcs_bucket_url.startswith("gs://"):
@@ -132,4 +167,5 @@
print("Successfully caught the expected OutOfRangeError while "
"reading one more record than is available")
- create_dir_test()
+ create_dir_test()
+ create_object_test()
diff --git a/tensorflow/tools/git/gen_git_source.py b/tensorflow/tools/git/gen_git_source.py
index db9de3f..267bcad 100755
--- a/tensorflow/tools/git/gen_git_source.py
+++ b/tensorflow/tools/git/gen_git_source.py
@@ -147,7 +147,7 @@
"""
unknown_label = b"unknown"
try:
- val = subprocess.check_output(["git", "-C", git_base_path, "describe",
+ val = subprocess.check_output(["git", str("--git-dir="+git_base_path+"/.git"), str("--work-tree="+git_base_path), "describe",
"--long", "--dirty", "--tags"]).strip()
return val if val else unknown_label
except subprocess.CalledProcessError:
diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh
index 34b6a58..2539ad4 100755
--- a/tensorflow/tools/pip_package/build_pip_package.sh
+++ b/tensorflow/tools/pip_package/build_pip_package.sh
@@ -107,7 +107,8 @@
mkdir -p ${TMPDIR}/third_party
pushd ${RUNFILES%org_tensorflow}
for header in $(find protobuf -name \*.h); do
- cp --parents "$header" ${TMPDIR}/google;
+ mkdir -p "${TMPDIR}/google/$(dirname ${header})"
+ cp "$header" "${TMPDIR}/google/$(dirname ${header})/"
done
popd
cp -R $RUNFILES/third_party/eigen3 ${TMPDIR}/third_party
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index e458f12..9b475e5 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -26,7 +26,7 @@
from setuptools.command.install import install as InstallCommandBase
from setuptools.dist import Distribution
-_VERSION = '0.11.0rc0'
+_VERSION = '0.11.0rc1'
REQUIRED_PACKAGES = [
'numpy >= 1.11.0',
diff --git a/tensorflow/tools/swig/.gitignore b/tensorflow/tools/swig/.gitignore
new file mode 100644
index 0000000..a14f886
--- /dev/null
+++ b/tensorflow/tools/swig/.gitignore
@@ -0,0 +1 @@
+swig_path
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index b13e6c7..4be2490 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -98,9 +98,9 @@
native.http_archive(
name = "protobuf",
- url = "http://github.com/google/protobuf/archive/c2b3e70efd2038a54ef8973771ac58192885125e.tar.gz",
- sha256 = "eafc1bc4c27970d62effe64ba6610823fdd66711f440d8ca4a168167786a2fcb",
- strip_prefix = "protobuf-c2b3e70efd2038a54ef8973771ac58192885125e",
+ url = "http://github.com/google/protobuf/archive/008b5a228b37c054f46ba478ccafa5e855cb16db.tar.gz",
+ sha256 = "2737ad055eb8a9bc63ed068e32c4ea280b62d8236578cb4d4120eb5543f759ab",
+ strip_prefix = "protobuf-008b5a228b37c054f46ba478ccafa5e855cb16db",
)
native.new_http_archive(
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/Tensor b/third_party/eigen3/unsupported/Eigen/CXX11/Tensor
index 7fafd2a..00d2e7c 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/Tensor
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/Tensor
@@ -1,3 +1,6 @@
+#ifdef _WIN32
+#define sleep(seconds) Sleep(1000*seconds)
+#endif // _WIN32
#include "unsupported/Eigen/CXX11/Tensor"
#ifdef _WIN32
diff --git a/util/python/python_config.sh b/util/python/python_config.sh
index 50f6398..8a780c8 100755
--- a/util/python/python_config.sh
+++ b/util/python/python_config.sh
@@ -113,29 +113,33 @@
echo -e "\n\nERROR: Problem getting python include path. Is distutils installed?"
exit 1
fi
- local python_lib_path
- # Split python_path into an array of paths, this allows path containing spaces
- IFS=','
- python_lib_path=($(python_path))
- unset IFS
- echo "Found possible Python library paths:"
- for x in "${python_lib_path[@]}"; do
- echo " $x"
- done
- set -- "${python_lib_path[@]}"
- echo "Please input the desired Python library path to use. Default is ["$1"]"
- read b || true
- if [ "$b" == "" ]; then
- python_lib="$(default_python_path "${python_lib_path[0]}")"
- echo $python_lib
- else
- if test -d "$b" -a -x "$b"; then
- python_lib="$b"
+
+ if [ -z "$PYTHON_LIB_PATH" ]; then
+ local python_lib_path
+ # Split python_path into an array of paths, this allows path containing spaces
+ IFS=','
+ python_lib_path=($(python_path))
+ unset IFS
+ echo "Found possible Python library paths:"
+ for x in "${python_lib_path[@]}"; do
+ echo " $x"
+ done
+ set -- "${python_lib_path[@]}"
+ echo "Please input the desired Python library path to use. Default is ["$1"]"
+ read b || true
+ if [ "$b" == "" ]; then
+ PYTHON_LIB_PATH="$(default_python_path "${python_lib_path[0]}")"
+ echo $PYTHON_LIB_PATH
else
- echo -e "\n\nERROR: The path you have entered does not exist."
- exit 1
+ PYTHON_LIB_PATH="$b"
fi
fi
+ if test -d "$PYTHON_LIB_PATH" -a -x "$PYTHON_LIB_PATH"; then
+ python_lib="$PYTHON_LIB_PATH"
+ else
+ echo -e "\n\nERROR: Invalid python library path: ${PYTHON_LIB_PATH}."
+ exit 1
+ fi
local numpy_include=$("${PYTHON_BIN_PATH}" -c 'from __future__ import print_function; import numpy; print(numpy.get_include());')
if [ "$numpy_include" == "" ]; then