Merge changes from github.
Change: 132733397
diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md
index f323d23..af76188 100644
--- a/ISSUE_TEMPLATE.md
+++ b/ISSUE_TEMPLATE.md
@@ -1,4 +1,5 @@
-GitHub issues are for bugs / installation problems / feature requests.  
+NOTE: Only file GitHub issues for bugs and feature requests.  All other topics will be closed.
+
 For general support from the community, see [StackOverflow](https://stackoverflow.com/questions/tagged/tensorflow).
 To make bugs and feature requests more easy to find and organize, we close issues that are deemed
 out of scope for GitHub Issues and point people to StackOverflow.
@@ -7,6 +8,8 @@
 The more information you provide, the more easily we will be able to offer
 help and advice.
 
+### What related GitHub issues or StackOverflow threads have you found by searching the web for your problem?
+
 ### Environment info
 Operating System:
 
@@ -15,7 +18,7 @@
 
 If installed from binary pip package, provide:
 
-1. Which pip package you installed.
+1. A link to the pip package you installed:
 2. The output from `python -c "import tensorflow; print(tensorflow.__version__)"`.
 
 If installed from source, provide 
@@ -23,13 +26,11 @@
 1. The commit hash (`git rev-parse HEAD`)
 2. The output of `bazel version`
 
-### Steps to reproduce
-1.
-2.
-3.
+### If possible, provide a minimal reproducible example (We usually don't have time to read hundreds of lines of your code)
 
-### What have you tried?
-1.
+
+### What other attempted solutions have you tried?
+
 
 ### Logs or other output that would be helpful
-(If logs are large, please upload as attachment).
+(If logs are large, please upload as attachment or provide link).
diff --git a/RELEASE.md b/RELEASE.md
index 503f831..212e296 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -125,7 +125,7 @@
 * Allow building against cuDNN 5 (not incl. RNN/LSTM support) 
 * Added instructions and binaries for ProtoBuf library with fast serialization and without 64MB limit
 * Added special functions
-* `bool`-strictness: Tensors have to be explictly compared to `None`
+* `bool`-strictness: Tensors have to be explicitly compared to `None`
 * Shape strictness: all fed values must have a shape that is compatible with the tensor they are replacing
 * Exposed `tf.while_loop` (deprecated `control_flow_ops.While`)
 * run() now takes RunOptions and RunMetadata, which enable timing stats
diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 4fe5960..3556e14 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -38,6 +38,8 @@
      CACHE PATH "Location where external projects will be downloaded.")
 mark_as_advanced(DOWNLOAD_LOCATION)
 
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+
 # External dependencies
 include(gif)
 include(png)
@@ -48,6 +50,8 @@
 include(boringssl)
 include(farmhash)
 include(highwayhash)
+include(protobuf)
+include(grpc)
 
 # Let's get to work!
 include(tf_core_framework.cmake)
@@ -57,10 +61,12 @@
 include(tf_models.cmake)
 include(tf_core_ops.cmake)
 include(tf_core_direct_session.cmake)
+include(tf_core_distributed_runtime.cmake)
 include(tf_core_kernels.cmake)
 include(tf_cc_ops.cmake)
 include(tf_tutorials.cmake)
 include(tf_tools.cmake)
+include(tf_python.cmake)
 
 if (tensorflow_BUILD_TESTS)
   include(tests.cmake)
diff --git a/tensorflow/contrib/cmake/external/boringssl.cmake b/tensorflow/contrib/cmake/external/boringssl.cmake
index ca86e1f..7d6245f 100644
--- a/tensorflow/contrib/cmake/external/boringssl.cmake
+++ b/tensorflow/contrib/cmake/external/boringssl.cmake
@@ -27,5 +27,6 @@
     CMAKE_CACHE_ARGS
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
+	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
diff --git a/tensorflow/contrib/cmake/external/farmhash.cmake b/tensorflow/contrib/cmake/external/farmhash.cmake
index 3cf8d5a..11397fe 100644
--- a/tensorflow/contrib/cmake/external/farmhash.cmake
+++ b/tensorflow/contrib/cmake/external/farmhash.cmake
@@ -27,6 +27,7 @@
         ${farmhash_BUILD}/configure
         --prefix=${farmhash_INSTALL}
         --enable-shared=yes
+	CXXFLAGS=-fPIC
 )
 
 # put farmhash includes in the directory where they are expected
diff --git a/tensorflow/contrib/cmake/external/gif.cmake b/tensorflow/contrib/cmake/external/gif.cmake
index 9fdaeec..32c6369 100644
--- a/tensorflow/contrib/cmake/external/gif.cmake
+++ b/tensorflow/contrib/cmake/external/gif.cmake
@@ -10,6 +10,8 @@
     "${gif_INSTALL}/include/gif_lib.h"
 )
 
+set(ENV{CFLAGS} "$ENV{CFLAGS} -fPIC")
+
 ExternalProject_Add(gif
     PREFIX gif
     URL ${gif_URL}
@@ -20,6 +22,7 @@
     INSTALL_COMMAND $(MAKE) install
     CONFIGURE_COMMAND
     ${CMAKE_CURRENT_BINARY_DIR}/gif/src/gif/configure
+    --with-pic
     --prefix=${gif_INSTALL}
     --enable-shared=yes
 )
diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake
new file mode 100644
index 0000000..30ddcad
--- /dev/null
+++ b/tensorflow/contrib/cmake/external/grpc.cmake
@@ -0,0 +1,27 @@
+include (ExternalProject)
+
+set(GRPC_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/include)
+set(GRPC_URL https://github.com/grpc/grpc.git)
+set(GRPC_BUILD ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc)
+set(GRPC_TAG 3bc78cd0b5bd784a235c01612d634b1ec5f8fb97)
+set(GRPC_LIBRARIES
+    ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc++_unsecure.a
+    ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc_unsecure.a
+    ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgpr.a)
+
+ExternalProject_Add(grpc
+    PREFIX grpc
+    DEPENDS protobuf
+    GIT_REPOSITORY ${GRPC_URL}
+    GIT_TAG ${GRPC_TAG}
+    DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
+    BUILD_IN_SOURCE 1
+    PATCH_COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/patches/grpc/CMakeLists.txt ${GRPC_BUILD}
+    INSTALL_COMMAND ""
+    CMAKE_CACHE_ARGS
+        -DCMAKE_BUILD_TYPE:STRING=Release
+        -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
+        -DPROTOBUF_INCLUDE_DIRS:STRING=${PROTOBUF_INCLUDE_DIRS}
+	-DPROTOBUF_LIBRARIES:STRING=${PROTOBUF_LIBRARIES}
+)
+
diff --git a/tensorflow/contrib/cmake/external/highwayhash.cmake b/tensorflow/contrib/cmake/external/highwayhash.cmake
index 2213534..fec44c2 100644
--- a/tensorflow/contrib/cmake/external/highwayhash.cmake
+++ b/tensorflow/contrib/cmake/external/highwayhash.cmake
@@ -19,7 +19,7 @@
 ExternalProject_Add(highwayhash
     PREFIX highwayhash
     GIT_REPOSITORY ${highwayhash_URL}
-    GIT_TAG ${highwayhash_HASH}
+    GIT_TAG ${highwayhash_TAG}
     DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
     BUILD_IN_SOURCE 1
     BUILD_COMMAND $(MAKE)
diff --git a/tensorflow/contrib/cmake/external/jpeg.cmake b/tensorflow/contrib/cmake/external/jpeg.cmake
index 4b6b648..edef25a 100644
--- a/tensorflow/contrib/cmake/external/jpeg.cmake
+++ b/tensorflow/contrib/cmake/external/jpeg.cmake
@@ -44,7 +44,6 @@
     )
 
 else()
-
     ExternalProject_Add(jpeg
         PREFIX jpeg
         URL ${jpeg_URL}
@@ -57,6 +56,7 @@
             ${jpeg_BUILD}/configure
             --prefix=${jpeg_INSTALL}
             --enable-shared=yes
+	    CFLAGS=-fPIC
     )
   
 endif()
diff --git a/tensorflow/contrib/cmake/external/jsoncpp.cmake b/tensorflow/contrib/cmake/external/jsoncpp.cmake
index b392f07..22d8139 100644
--- a/tensorflow/contrib/cmake/external/jsoncpp.cmake
+++ b/tensorflow/contrib/cmake/external/jsoncpp.cmake
@@ -25,5 +25,6 @@
     CMAKE_CACHE_ARGS
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
+	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
diff --git a/tensorflow/contrib/cmake/external/png.cmake b/tensorflow/contrib/cmake/external/png.cmake
index ca36334..56d6ae7 100644
--- a/tensorflow/contrib/cmake/external/png.cmake
+++ b/tensorflow/contrib/cmake/external/png.cmake
@@ -22,6 +22,7 @@
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
         -DCMAKE_INSTALL_PREFIX:STRING=${png_INSTALL}
+	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
 ## put png includes in the directory where they are expected
diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake
new file mode 100644
index 0000000..dc74e9a
--- /dev/null
+++ b/tensorflow/contrib/cmake/external/protobuf.cmake
@@ -0,0 +1,22 @@
+include (ExternalProject)
+
+set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src)
+set(PROTOBUF_URL https://github.com/google/protobuf/releases/download/v3.0.0/protobuf-cpp-3.0.0.zip)
+set(PROTOBUF_HASH SHA256=e886ea7d08267fc3d866ac42d6dd7461ae11c491836adef6f34c04cad0be3078)
+set(PROTOBUF_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/libprotobuf.a)
+set(PROTOBUF_PROTOC_EXECUTABLE ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/protoc)
+
+ExternalProject_Add(protobuf
+    PREFIX protobuf
+    URL ${PROTOBUF_URL}
+    DOWNLOAD_DIR "${DOWNLOAD_LOCATION}"
+    BUILD_IN_SOURCE 1
+    SOURCE_DIR ${CMAKE_BINARY_DIR}/protobuf/src/protobuf
+    CONFIGURE_COMMAND ${CMAKE_COMMAND} cmake/ -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+    INSTALL_COMMAND ""
+    CMAKE_CACHE_ARGS
+        -DCMAKE_BUILD_TYPE:STRING=Release
+        -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
+	-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+)
+
diff --git a/tensorflow/contrib/cmake/external/re2.cmake b/tensorflow/contrib/cmake/external/re2.cmake
index 2a193c9..1da2ff6 100644
--- a/tensorflow/contrib/cmake/external/re2.cmake
+++ b/tensorflow/contrib/cmake/external/re2.cmake
@@ -14,6 +14,8 @@
 # For the rest, we'll just add the build dir as an include dir.
 set(re2_HEADERS
     "${re2_BUILD}/re2/re2.h"
+    "${re2_BUILD}/re2/stringpiece.h"
+    "${re2_BUILD}/re2/variadic_function.h"
 )
 
 ExternalProject_Add(re2
@@ -26,11 +28,12 @@
     CMAKE_CACHE_ARGS
         -DCMAKE_BUILD_TYPE:STRING=Release
         -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
+        -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
 )
 
 ## put re2 includes in the directory where they are expected
 add_custom_target(re2_create_destination_dir
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${re2_INCLUDE_DIR}
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${re2_INCLUDE_DIR}/re2
     DEPENDS re2)
 
 add_custom_target(re2_copy_headers_to_destination
@@ -38,7 +41,7 @@
 
 foreach(header_file ${re2_HEADERS})
     add_custom_command(TARGET re2_copy_headers_to_destination PRE_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${re2_INCLUDE_DIR})
+    COMMAND ${CMAKE_COMMAND} -E copy ${header_file} ${re2_INCLUDE_DIR}/re2)
 endforeach()
 
 ADD_LIBRARY(re2_lib STATIC IMPORTED
diff --git a/tensorflow/contrib/cmake/patches/grpc/CMakeLists.txt b/tensorflow/contrib/cmake/patches/grpc/CMakeLists.txt
new file mode 100644
index 0000000..6cc2e4e
--- /dev/null
+++ b/tensorflow/contrib/cmake/patches/grpc/CMakeLists.txt
@@ -0,0 +1,315 @@
+# GRPC global cmake file, modified for the TensorFlow build system.
+# This currently builds C and C++ code.
+
+# This file is based on the CMakeLists.txt available from here:
+# https://github.com/grpc/grpc/blob/3bc78cd0b5bd784a235c01612d634b1ec5f8fb97/CMakeLists.txt
+# with modifications to remove dependencies on SSL, and to reuse
+# previously compiled libprotobuf.
+#
+# Copyright 2016, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+cmake_minimum_required(VERSION 2.8)
+
+set(PACKAGE_NAME      "grpc")
+set(PACKAGE_VERSION   "1.0.0-pre2-tensorflow")
+set(PACKAGE_STRING    "${PACKAGE_NAME} ${PACKAGE_VERSION}")
+set(PACKAGE_TARNAME   "${PACKAGE_NAME}-${PACKAGE_VERSION}")
+set(PACKAGE_BUGREPORT "https://github.com/grpc/grpc/issues/")
+project(${PACKAGE_NAME} C CXX)
+
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -std=c11")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+
+add_library(gpr
+  src/core/lib/profiling/basic_timers.c
+  src/core/lib/profiling/stap_timers.c
+  src/core/lib/support/alloc.c
+  src/core/lib/support/avl.c
+  src/core/lib/support/backoff.c
+  src/core/lib/support/cmdline.c
+  src/core/lib/support/cpu_iphone.c
+  src/core/lib/support/cpu_linux.c
+  src/core/lib/support/cpu_posix.c
+  src/core/lib/support/cpu_windows.c
+  src/core/lib/support/env_linux.c
+  src/core/lib/support/env_posix.c
+  src/core/lib/support/env_windows.c
+  src/core/lib/support/histogram.c
+  src/core/lib/support/host_port.c
+  src/core/lib/support/log.c
+  src/core/lib/support/log_android.c
+  src/core/lib/support/log_linux.c
+  src/core/lib/support/log_posix.c
+  src/core/lib/support/log_windows.c
+  src/core/lib/support/murmur_hash.c
+  src/core/lib/support/slice.c
+  src/core/lib/support/slice_buffer.c
+  src/core/lib/support/stack_lockfree.c
+  src/core/lib/support/string.c
+  src/core/lib/support/string_posix.c
+  src/core/lib/support/string_util_windows.c
+  src/core/lib/support/string_windows.c
+  src/core/lib/support/subprocess_posix.c
+  src/core/lib/support/subprocess_windows.c
+  src/core/lib/support/sync.c
+  src/core/lib/support/sync_posix.c
+  src/core/lib/support/sync_windows.c
+  src/core/lib/support/thd.c
+  src/core/lib/support/thd_posix.c
+  src/core/lib/support/thd_windows.c
+  src/core/lib/support/time.c
+  src/core/lib/support/time_posix.c
+  src/core/lib/support/time_precise.c
+  src/core/lib/support/time_windows.c
+  src/core/lib/support/tls_pthread.c
+  src/core/lib/support/tmpfile_msys.c
+  src/core/lib/support/tmpfile_posix.c
+  src/core/lib/support/tmpfile_windows.c
+  src/core/lib/support/wrap_memcpy.c
+)
+
+target_include_directories(gpr
+  PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
+  PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include
+  PRIVATE ${PROTOBUF_INCLUDE_DIRS}
+)
+
+add_library(grpc_unsecure
+  src/core/lib/surface/init.c
+  src/core/lib/surface/init_unsecure.c
+  src/core/lib/channel/channel_args.c
+  src/core/lib/channel/channel_stack.c
+  src/core/lib/channel/channel_stack_builder.c
+  src/core/lib/channel/compress_filter.c
+  src/core/lib/channel/connected_channel.c
+  src/core/lib/channel/http_client_filter.c
+  src/core/lib/channel/http_server_filter.c
+  src/core/lib/compression/compression.c
+  src/core/lib/compression/message_compress.c
+  src/core/lib/debug/trace.c
+  src/core/lib/http/format_request.c
+  src/core/lib/http/httpcli.c
+  src/core/lib/http/parser.c
+  src/core/lib/iomgr/closure.c
+  src/core/lib/iomgr/endpoint.c
+  src/core/lib/iomgr/endpoint_pair_posix.c
+  src/core/lib/iomgr/endpoint_pair_windows.c
+  src/core/lib/iomgr/error.c
+  src/core/lib/iomgr/ev_epoll_linux.c
+  src/core/lib/iomgr/ev_poll_and_epoll_posix.c
+  src/core/lib/iomgr/ev_poll_posix.c
+  src/core/lib/iomgr/ev_posix.c
+  src/core/lib/iomgr/exec_ctx.c
+  src/core/lib/iomgr/executor.c
+  src/core/lib/iomgr/iocp_windows.c
+  src/core/lib/iomgr/iomgr.c
+  src/core/lib/iomgr/iomgr_posix.c
+  src/core/lib/iomgr/iomgr_windows.c
+  src/core/lib/iomgr/load_file.c
+  src/core/lib/iomgr/network_status_tracker.c
+  src/core/lib/iomgr/polling_entity.c
+  src/core/lib/iomgr/pollset_set_windows.c
+  src/core/lib/iomgr/pollset_windows.c
+  src/core/lib/iomgr/resolve_address_posix.c
+  src/core/lib/iomgr/resolve_address_windows.c
+  src/core/lib/iomgr/sockaddr_utils.c
+  src/core/lib/iomgr/socket_utils_common_posix.c
+  src/core/lib/iomgr/socket_utils_linux.c
+  src/core/lib/iomgr/socket_utils_posix.c
+  src/core/lib/iomgr/socket_windows.c
+  src/core/lib/iomgr/tcp_client_posix.c
+  src/core/lib/iomgr/tcp_client_windows.c
+  src/core/lib/iomgr/tcp_posix.c
+  src/core/lib/iomgr/tcp_server_posix.c
+  src/core/lib/iomgr/tcp_server_windows.c
+  src/core/lib/iomgr/tcp_windows.c
+  src/core/lib/iomgr/time_averaged_stats.c
+  src/core/lib/iomgr/timer.c
+  src/core/lib/iomgr/timer_heap.c
+  src/core/lib/iomgr/udp_server.c
+  src/core/lib/iomgr/unix_sockets_posix.c
+  src/core/lib/iomgr/unix_sockets_posix_noop.c
+  src/core/lib/iomgr/wakeup_fd_eventfd.c
+  src/core/lib/iomgr/wakeup_fd_nospecial.c
+  src/core/lib/iomgr/wakeup_fd_pipe.c
+  src/core/lib/iomgr/wakeup_fd_posix.c
+  src/core/lib/iomgr/workqueue_posix.c
+  src/core/lib/iomgr/workqueue_windows.c
+  src/core/lib/json/json.c
+  src/core/lib/json/json_reader.c
+  src/core/lib/json/json_string.c
+  src/core/lib/json/json_writer.c
+  src/core/lib/surface/alarm.c
+  src/core/lib/surface/api_trace.c
+  src/core/lib/surface/byte_buffer.c
+  src/core/lib/surface/byte_buffer_reader.c
+  src/core/lib/surface/call.c
+  src/core/lib/surface/call_details.c
+  src/core/lib/surface/call_log_batch.c
+  src/core/lib/surface/channel.c
+  src/core/lib/surface/channel_init.c
+  src/core/lib/surface/channel_ping.c
+  src/core/lib/surface/channel_stack_type.c
+  src/core/lib/surface/completion_queue.c
+  src/core/lib/surface/event_string.c
+  src/core/lib/surface/lame_client.c
+  src/core/lib/surface/metadata_array.c
+  src/core/lib/surface/server.c
+  src/core/lib/surface/validate_metadata.c
+  src/core/lib/surface/version.c
+  src/core/lib/transport/byte_stream.c
+  src/core/lib/transport/connectivity_state.c
+  src/core/lib/transport/metadata.c
+  src/core/lib/transport/metadata_batch.c
+  src/core/lib/transport/static_metadata.c
+  src/core/lib/transport/transport.c
+  src/core/lib/transport/transport_op_string.c
+  src/core/ext/transport/chttp2/server/insecure/server_chttp2.c
+  src/core/ext/transport/chttp2/server/insecure/server_chttp2_posix.c
+  src/core/ext/transport/chttp2/transport/bin_decoder.c
+  src/core/ext/transport/chttp2/transport/bin_encoder.c
+  src/core/ext/transport/chttp2/transport/chttp2_plugin.c
+  src/core/ext/transport/chttp2/transport/chttp2_transport.c
+  src/core/ext/transport/chttp2/transport/frame_data.c
+  src/core/ext/transport/chttp2/transport/frame_goaway.c
+  src/core/ext/transport/chttp2/transport/frame_ping.c
+  src/core/ext/transport/chttp2/transport/frame_rst_stream.c
+  src/core/ext/transport/chttp2/transport/frame_settings.c
+  src/core/ext/transport/chttp2/transport/frame_window_update.c
+  src/core/ext/transport/chttp2/transport/hpack_encoder.c
+  src/core/ext/transport/chttp2/transport/hpack_parser.c
+  src/core/ext/transport/chttp2/transport/hpack_table.c
+  src/core/ext/transport/chttp2/transport/huffsyms.c
+  src/core/ext/transport/chttp2/transport/incoming_metadata.c
+  src/core/ext/transport/chttp2/transport/parsing.c
+  src/core/ext/transport/chttp2/transport/status_conversion.c
+  src/core/ext/transport/chttp2/transport/stream_lists.c
+  src/core/ext/transport/chttp2/transport/stream_map.c
+  src/core/ext/transport/chttp2/transport/timeout_encoding.c
+  src/core/ext/transport/chttp2/transport/varint.c
+  src/core/ext/transport/chttp2/transport/writing.c
+  src/core/ext/transport/chttp2/alpn/alpn.c
+  src/core/ext/transport/chttp2/client/insecure/channel_create.c
+  src/core/ext/transport/chttp2/client/insecure/channel_create_posix.c
+  src/core/ext/client_config/channel_connectivity.c
+  src/core/ext/client_config/client_channel.c
+  src/core/ext/client_config/client_channel_factory.c
+  src/core/ext/client_config/client_config.c
+  src/core/ext/client_config/client_config_plugin.c
+  src/core/ext/client_config/connector.c
+  src/core/ext/client_config/default_initial_connect_string.c
+  src/core/ext/client_config/initial_connect_string.c
+  src/core/ext/client_config/lb_policy.c
+  src/core/ext/client_config/lb_policy_factory.c
+  src/core/ext/client_config/lb_policy_registry.c
+  src/core/ext/client_config/parse_address.c
+  src/core/ext/client_config/resolver.c
+  src/core/ext/client_config/resolver_factory.c
+  src/core/ext/client_config/resolver_registry.c
+  src/core/ext/client_config/subchannel.c
+  src/core/ext/client_config/subchannel_call_holder.c
+  src/core/ext/client_config/subchannel_index.c
+  src/core/ext/client_config/uri_parser.c
+  src/core/ext/resolver/dns/native/dns_resolver.c
+  src/core/ext/resolver/sockaddr/sockaddr_resolver.c
+  src/core/ext/load_reporting/load_reporting.c
+  src/core/ext/load_reporting/load_reporting_filter.c
+  src/core/ext/lb_policy/grpclb/load_balancer_api.c
+  src/core/ext/lb_policy/grpclb/proto/grpc/lb/v1/load_balancer.pb.c
+  third_party/nanopb/pb_common.c
+  third_party/nanopb/pb_decode.c
+  third_party/nanopb/pb_encode.c
+  src/core/ext/lb_policy/pick_first/pick_first.c
+  src/core/ext/lb_policy/round_robin/round_robin.c
+  src/core/ext/census/context.c
+  src/core/ext/census/gen/census.pb.c
+  src/core/ext/census/grpc_context.c
+  src/core/ext/census/grpc_filter.c
+  src/core/ext/census/grpc_plugin.c
+  src/core/ext/census/initialize.c
+  src/core/ext/census/mlog.c
+  src/core/ext/census/operation.c
+  src/core/ext/census/placeholders.c
+  src/core/ext/census/tracing.c
+  src/core/plugin_registry/grpc_unsecure_plugin_registry.c
+)
+
+target_include_directories(grpc_unsecure
+  PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
+  PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include
+  PRIVATE ${PROTOBUF_ROOT_DIR}/src
+)
+
+target_link_libraries(grpc_unsecure
+  gpr
+)
+
+add_library(grpc++_unsecure
+  src/cpp/common/insecure_create_auth_context.cc
+  src/cpp/client/channel.cc
+  src/cpp/client/client_context.cc
+  src/cpp/client/create_channel.cc
+  src/cpp/client/create_channel_internal.cc
+  src/cpp/client/create_channel_posix.cc
+  src/cpp/client/credentials.cc
+  src/cpp/client/generic_stub.cc
+  src/cpp/client/insecure_credentials.cc
+  src/cpp/common/channel_arguments.cc
+  src/cpp/common/completion_queue.cc
+  src/cpp/common/core_codegen.cc
+  src/cpp/common/rpc_method.cc
+  src/cpp/server/async_generic_service.cc
+  src/cpp/server/create_default_thread_pool.cc
+  src/cpp/server/dynamic_thread_pool.cc
+  src/cpp/server/insecure_server_credentials.cc
+  src/cpp/server/server.cc
+  src/cpp/server/server_builder.cc
+  src/cpp/server/server_context.cc
+  src/cpp/server/server_credentials.cc
+  src/cpp/server/server_posix.cc
+  src/cpp/util/byte_buffer.cc
+  src/cpp/util/slice.cc
+  src/cpp/util/status.cc
+  src/cpp/util/string_ref.cc
+  src/cpp/util/time.cc
+  src/cpp/codegen/codegen_init.cc
+)
+
+target_include_directories(grpc++_unsecure
+  PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
+  PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include
+  PRIVATE ${PROTOBUF_INCLUDE_DIRS}
+)
+
+target_link_libraries(grpc++_unsecure
+  ${PROTOBUF_LIBRARIES}
+  gpr
+  grpc_unsecure
+)
diff --git a/tensorflow/contrib/cmake/tf_core_distributed_runtime.cmake b/tensorflow/contrib/cmake/tf_core_distributed_runtime.cmake
new file mode 100644
index 0000000..d52b18a
--- /dev/null
+++ b/tensorflow/contrib/cmake/tf_core_distributed_runtime.cmake
@@ -0,0 +1,98 @@
+########################################################
+# tf_core_distributed_runtime library
+########################################################
+file(GLOB_RECURSE tf_core_distributed_runtime_srcs
+   "${tensorflow_source_dir}/tensorflow/core/distributed_runtime/*.h"
+   "${tensorflow_source_dir}/tensorflow/core/distributed_runtime/*.cc"
+)
+
+file(GLOB_RECURSE tf_core_distributed_runtime_exclude_srcs
+    "${tensorflow_source_dir}/tensorflow/core/distributed_runtime/*test*.h"
+    "${tensorflow_source_dir}/tensorflow/core/distributed_runtime/*test*.cc"
+    "${tensorflow_source_dir}/tensorflow/core/distributed_runtime/rpc/grpc_tensorflow_server.cc"
+)
+
+list(REMOVE_ITEM tf_core_distributed_runtime_srcs ${tf_core_distributed_runtime_exclude_srcs})
+
+add_library(tf_core_distributed_runtime OBJECT ${tf_core_distributed_runtime_srcs})
+
+add_dependencies(tf_core_distributed_runtime
+    tf_core_cpu grpc
+    re2_copy_headers_to_destination
+)
+
+target_include_directories(tf_core_distributed_runtime PRIVATE
+   ${tensorflow_source_dir}
+   ${eigen_INCLUDE_DIRS}
+   ${GRPC_INCLUDE_DIRS}
+   ${re2_INCLUDE_DIR}
+)
+
+target_compile_options(tf_core_distributed_runtime PRIVATE
+   -fno-exceptions
+   -DEIGEN_AVOID_STL_ARRAY
+)
+
+# C++11
+target_compile_features(tf_core_distributed_runtime PRIVATE
+   cxx_rvalue_references
+)
+
+########################################################
+# grpc_tensorflow_server executable
+########################################################
+set(grpc_tensorflow_server_srcs
+    "${tensorflow_source_dir}/tensorflow/core/distributed_runtime/rpc/grpc_tensorflow_server.cc"
+)
+
+add_executable(grpc_tensorflow_server
+    ${grpc_tensorflow_server_srcs}
+    $<TARGET_OBJECTS:tf_core_lib>
+    $<TARGET_OBJECTS:tf_core_cpu>
+    $<TARGET_OBJECTS:tf_core_framework>
+    $<TARGET_OBJECTS:tf_core_kernels>
+    $<TARGET_OBJECTS:tf_cc_framework>
+    $<TARGET_OBJECTS:tf_cc_ops>
+    $<TARGET_OBJECTS:tf_core_ops>
+    $<TARGET_OBJECTS:tf_core_direct_session>
+    $<TARGET_OBJECTS:tf_core_distributed_runtime>
+)
+
+add_dependencies(tf_core_distributed_runtime
+    grpc
+    re2_copy_headers_to_destination
+)
+
+target_include_directories(grpc_tensorflow_server PUBLIC
+    ${tensorflow_source_dir}
+    ${eigen_INCLUDE_DIRS}
+    ${GRPC_INCLUDE_DIRS}
+)
+
+find_package(ZLIB REQUIRED)
+
+target_link_libraries(grpc_tensorflow_server PUBLIC
+    ${CMAKE_THREAD_LIBS_INIT}
+    ${PROTOBUF_LIBRARIES}
+    ${GRPC_LIBRARIES}
+    tf_protos_cc
+    re2_lib
+    ${boringssl_STATIC_LIBRARIES}
+    ${farmhash_STATIC_LIBRARIES}
+    ${gif_STATIC_LIBRARIES}
+    ${jpeg_STATIC_LIBRARIES}
+    ${jsoncpp_STATIC_LIBRARIES}
+    ${png_STATIC_LIBRARIES}
+    ${ZLIB_LIBRARIES}
+    ${CMAKE_DL_LIBS}
+)
+
+target_compile_options(grpc_tensorflow_server PRIVATE
+    -fno-exceptions
+    -DEIGEN_AVOID_STL_ARRAY
+)
+
+# C++11
+target_compile_features(grpc_tensorflow_server PRIVATE
+    cxx_rvalue_references
+)
diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake
index cbcd11a..89ad766 100644
--- a/tensorflow/contrib/cmake/tf_core_framework.cmake
+++ b/tensorflow/contrib/cmake/tf_core_framework.cmake
@@ -24,8 +24,8 @@
       OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${REL_DIR}/${FIL_WE}.pb.cc"
              "${CMAKE_CURRENT_BINARY_DIR}/${REL_DIR}/${FIL_WE}.pb.h"
       COMMAND  ${PROTOBUF_PROTOC_EXECUTABLE}
-      ARGS --cpp_out  ${CMAKE_CURRENT_BINARY_DIR} -I ${ROOT_DIR} ${ABS_FIL}
-      DEPENDS ${ABS_FIL} ${PROTOBUF_PROTOC_EXECUTABLE}
+      ARGS --cpp_out  ${CMAKE_CURRENT_BINARY_DIR} -I ${ROOT_DIR} ${ABS_FIL} -I ${PROTOBUF_INCLUDE_DIRS}
+      DEPENDS ${ABS_FIL} protobuf
       COMMENT "Running C++ protocol buffer compiler on ${FIL}"
       VERBATIM )
   endforeach()
@@ -71,13 +71,10 @@
 # tf_protos_cc library
 ########################################################
 
-# Build proto library
-include(FindProtobuf)
-find_package(Protobuf REQUIRED)
 include_directories(${PROTOBUF_INCLUDE_DIRS})
 include_directories(${CMAKE_CURRENT_BINARY_DIR})
 file(GLOB_RECURSE tf_protos_cc_srcs RELATIVE ${tensorflow_source_dir}
-    "${tensorflow_source_dir}/tensorflow/*.proto"
+    "${tensorflow_source_dir}/tensorflow/core/*.proto"
 )
 RELATIVE_PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS
     ${tensorflow_source_dir} ${tf_protos_cc_srcs}
diff --git a/tensorflow/contrib/cmake/tf_core_ops.cmake b/tensorflow/contrib/cmake/tf_core_ops.cmake
index c6124b6..2aa03fa 100644
--- a/tensorflow/contrib/cmake/tf_core_ops.cmake
+++ b/tensorflow/contrib/cmake/tf_core_ops.cmake
@@ -16,6 +16,7 @@
     "attention_ops"
     "candidate_sampling_ops"
     "control_flow_ops"
+    "ctc_ops"
     "data_flow_ops"
     "image_ops"
     "io_ops"
diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
new file mode 100644
index 0000000..1efb59e
--- /dev/null
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -0,0 +1,393 @@
+# CMake rules for generating the TensorFlow Python bindings.
+#
+# Known limitations:
+# * Generates output in a hard-coded path ${CMAKE_CURRENT_BINARY_DIR}/tf_python.
+# * No support for dynamic library loading.
+# * No support for tf.contrib. (TODO(mrry): Add rules for building op libraries.)
+# * No support for Python 3. (TODO(mrry): Add override for FindPythonInterp.)
+#
+# The _pywrap_tensorflow target builds everything.
+
+########################################################
+# Resolve installed dependencies
+########################################################
+
+# 1. Resolve the installed version of SWIG.
+FIND_PACKAGE(SWIG REQUIRED)
+INCLUDE(${SWIG_USE_FILE})
+
+# 2. Resolve the installed version of Python (for Python.h and python).
+# TODO(mrry): Parameterize the build script to enable Python 3 building.
+include(FindPythonInterp)
+if(NOT PYTHON_INCLUDE_DIR)
+  set(PYTHON_NOT_FOUND false)
+  exec_program("${PYTHON_EXECUTABLE}"
+    ARGS "-c 'import distutils.sysconfig; print distutils.sysconfig.get_python_inc()'"
+    OUTPUT_VARIABLE PYTHON_INCLUDE_DIR
+    RETURN_VALUE PYTHON_NOT_FOUND)
+  message(${PYTHON_INCLUDE_DIR})
+  if(${PYTHON_NOT_FOUND})
+    message(FATAL_ERROR
+            "Cannot get Python include directory. Is distutils installed?")
+  endif(${PYTHON_NOT_FOUND})
+endif(NOT PYTHON_INCLUDE_DIR)
+FIND_PACKAGE(PythonLibs)
+
+# 3. Resolve the installed version of NumPy (for numpy/arrayobject.h).
+if(NOT NUMPY_INCLUDE_DIR)
+  set(NUMPY_NOT_FOUND false)
+  exec_program("${PYTHON_EXECUTABLE}"
+    ARGS "-c 'import numpy; print numpy.get_include()'"
+    OUTPUT_VARIABLE NUMPY_INCLUDE_DIR
+    RETURN_VALUE NUMPY_NOT_FOUND)
+  if(${NUMPY_NOT_FOUND})
+    message(FATAL_ERROR
+            "Cannot get NumPy include directory: Is NumPy installed?")
+  endif(${NUMPY_NOT_FOUND})
+endif(NOT NUMPY_INCLUDE_DIR)
+
+# 4. Resolve the installed version of zlib (for libz.so).
+find_package(ZLIB REQUIRED)
+
+
+########################################################
+# Build the Python directory structure.
+########################################################
+
+# TODO(mrry): Configure this to build in a directory other than tf_python/
+# TODO(mrry): Assemble the Python files into a PIP package.
+
+# tf_python_srcs contains all static .py files
+file(GLOB_RECURSE tf_python_srcs RELATIVE ${tensorflow_source_dir}
+    "${tensorflow_source_dir}/tensorflow/python/*.py"
+)
+list(APPEND tf_python_srcs "tensorflow/__init__.py")
+
+# tf_python_copy_scripts_to_destination copies all Python files
+# (including static source and generated protobuf wrappers, but *not*
+# generated TensorFlow op wrappers) into tf_python/.
+add_custom_target(tf_python_copy_scripts_to_destination)
+
+# Copy static files to tf_python/.
+foreach(script ${tf_python_srcs})
+  get_filename_component(REL_DIR ${script} DIRECTORY)
+    add_custom_command(TARGET tf_python_copy_scripts_to_destination PRE_BUILD
+    COMMAND ${CMAKE_COMMAND} -E copy ${tensorflow_source_dir}/${script} ${CMAKE_CURRENT_BINARY_DIR}/tf_python/${script})
+endforeach()
+
+# Generates the Python protobuf wrappers.
+# ROOT_DIR must be absolute; subsequent arguments are interpreted as
+# paths of .proto files, and must be relative to ROOT_DIR.
+function(RELATIVE_PROTOBUF_GENERATE_PYTHON ROOT_DIR)
+  if(NOT ARGN)
+    message(SEND_ERROR "Error: RELATIVE_PROTOBUF_GENERATE_PYTHON() called without any proto files")
+    return()
+  endif()
+  foreach(FIL ${ARGN})
+    set(ABS_FIL ${ROOT_DIR}/${FIL})
+    get_filename_component(FIL_WE ${FIL} NAME_WE)
+    get_filename_component(FIL_DIR ${ABS_FIL} PATH)
+    file(RELATIVE_PATH REL_DIR ${ROOT_DIR} ${FIL_DIR})
+    add_custom_command(
+      TARGET tf_python_copy_scripts_to_destination PRE_LINK
+      COMMAND  ${PROTOBUF_PROTOC_EXECUTABLE}
+      ARGS --python_out  ${CMAKE_CURRENT_BINARY_DIR}/tf_python/ -I ${ROOT_DIR} -I ${PROTOBUF_INCLUDE_DIRS} ${ABS_FIL} 
+      DEPENDS ${ABS_FIL} ${PROTOBUF_PROTOC_EXECUTABLE} protobuf
+      COMMENT "Running Pyton protocol buffer compiler on ${FIL}"
+      VERBATIM )
+  endforeach()
+endfunction()
+
+file(GLOB_RECURSE tf_protos_python_srcs RELATIVE ${tensorflow_source_dir}
+    "${tensorflow_source_dir}/tensorflow/core/*.proto"
+    "${tensorflow_source_dir}/tensorflow/python/*.proto"
+)
+RELATIVE_PROTOBUF_GENERATE_PYTHON(
+    ${tensorflow_source_dir} ${tf_protos_python_srcs}
+)
+
+# tf_python_touchup_modules adds empty __init__.py files to all
+# directories containing Python code, so that Python will recognize
+# them as modules.
+add_custom_target(tf_python_touchup_modules
+  DEPENDS tf_python_copy_scripts_to_destination)
+
+function(add_python_module MODULE_NAME)
+    add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD
+        COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}/tf_python/${MODULE_NAME}") 
+    add_custom_command(TARGET tf_python_touchup_modules PRE_BUILD
+        COMMAND ${CMAKE_COMMAND} -E touch "${CMAKE_CURRENT_BINARY_DIR}/tf_python/${MODULE_NAME}/__init__.py")
+endfunction()
+
+add_python_module("tensorflow")
+add_python_module("tensorflow/core")
+add_python_module("tensorflow/core/example")
+add_python_module("tensorflow/core/framework")
+add_python_module("tensorflow/core/lib")
+add_python_module("tensorflow/core/lib/core")
+add_python_module("tensorflow/core/protobuf")
+add_python_module("tensorflow/core/util")
+add_python_module("tensorflow/python")
+add_python_module("tensorflow/python/client")
+add_python_module("tensorflow/python/framework")
+add_python_module("tensorflow/python/ops")
+add_python_module("tensorflow/python/kernel_tests")
+add_python_module("tensorflow/python/lib")
+add_python_module("tensorflow/python/lib/core")
+add_python_module("tensorflow/python/lib/core/io")
+add_python_module("tensorflow/python/platform")
+add_python_module("tensorflow/python/platform/default")
+add_python_module("tensorflow/python/platform/summary")
+add_python_module("tensorflow/python/platform/summary/impl")
+add_python_module("tensorflow/python/tools")
+add_python_module("tensorflow/python/training")
+add_python_module("tensorflow/python/util")
+add_python_module("tensorflow/python/util/protobuf")
+add_python_module("tensorflow/contrib")
+add_python_module("tensorflow/contrib/bayesflow")
+add_python_module("tensorflow/contrib/bayesflow/python")
+add_python_module("tensorflow/contrib/bayesflow/python/ops")
+add_python_module("tensorflow/contrib/bayesflow/python/ops/bernoulli")
+add_python_module("tensorflow/contrib/framework")
+add_python_module("tensorflow/contrib/framework/python")
+add_python_module("tensorflow/contrib/framework/python/framework")
+add_python_module("tensorflow/contrib/layers")
+add_python_module("tensorflow/contrib/layers/python")
+add_python_module("tensorflow/contrib/layers/python/layers")
+add_python_module("tensorflow/contrib/layers/python/ops")
+
+
+
+########################################################
+# tf_python_op_gen_main library
+########################################################
+set(tf_python_op_gen_main_srcs
+    "${tensorflow_source_dir}/tensorflow/python/framework/python_op_gen.cc"
+    "${tensorflow_source_dir}/tensorflow/python/framework/python_op_gen_main.cc"
+    "${tensorflow_source_dir}/tensorflow/python/framework/python_op_gen.h"
+)
+
+add_library(tf_python_op_gen_main OBJECT ${tf_python_op_gen_main_srcs})
+
+add_dependencies(tf_python_op_gen_main tf_core_framework)
+
+target_include_directories(tf_python_op_gen_main PRIVATE
+    ${tensorflow_source_dir}
+    ${eigen_INCLUDE_DIRS}
+)
+
+target_compile_options(tf_python_op_gen_main PRIVATE
+    -fno-exceptions
+    -DEIGEN_AVOID_STL_ARRAY
+)
+
+# C++11
+target_compile_features(tf_python_op_gen_main PRIVATE
+    cxx_rvalue_references
+)
+
+# create directory for ops generated files
+set(python_ops_target_dir ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/ops)
+
+set(tf_python_ops_generated_files)
+
+set(tf_python_op_lib_names
+    ${tf_op_lib_names}
+    "user_ops"
+)
+
+function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name)
+    set(oneValueArgs DESTINATION)
+    set(multiValueArgs ADDITIONAL_LIBRARIES)
+    cmake_parse_arguments(GENERATE_PYTHON_OP_LIB
+      "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+    if(NOT DEFINED GENERATE_PYTHON_OP_LIB_DESTINATION)
+      # Default destination is tf_python/tensorflow/python/ops/gen_<...>.py.
+      set(GENERATE_PYTHON_OP_LIB_DESTINATION
+          "${python_ops_target_dir}/gen_${tf_python_op_lib_name}.py")
+    endif()
+
+    # Create a C++ executable that links in the appropriate op
+    # registrations and generates Python wrapper code based on the
+    # registered ops.
+    add_executable(${tf_python_op_lib_name}_gen_python
+        $<TARGET_OBJECTS:tf_python_op_gen_main>
+        $<TARGET_OBJECTS:tf_${tf_python_op_lib_name}>
+        $<TARGET_OBJECTS:tf_core_lib>
+        $<TARGET_OBJECTS:tf_core_framework>
+	${GENERATE_PYTHON_OP_LIB_ADDITIONAL_LIBRARIES}
+    )
+    target_include_directories(${tf_python_op_lib_name}_gen_python PRIVATE
+        ${tensorflow_source_dir}
+        ${eigen_INCLUDE_DIRS}
+    )
+    target_link_libraries(${tf_python_op_lib_name}_gen_python PRIVATE
+        ${CMAKE_THREAD_LIBS_INIT}
+        ${PROTOBUF_LIBRARIES}
+        tf_protos_cc
+        re2_lib
+        ${gif_STATIC_LIBRARIES}
+	${jpeg_STATIC_LIBRARIES}
+        ${png_STATIC_LIBRARIES}
+        ${ZLIB_LIBRARIES}
+        ${jsoncpp_STATIC_LIBRARIES}
+        ${boringssl_STATIC_LIBRARIES}
+        ${CMAKE_DL_LIBS}
+    )
+    target_compile_options(${tf_python_op_lib_name}_gen_python PRIVATE
+        -fno-exceptions
+        -DEIGEN_AVOID_STL_ARRAY
+        -lm
+    )
+    # C++11
+    target_compile_features(${tf_python_op_lib_name}_gen_python PRIVATE
+        cxx_rvalue_references
+    )
+
+    # Use the generated C++ executable to create a Python file
+    # containing the wrappers.
+    add_custom_command(
+      OUTPUT ${GENERATE_PYTHON_OP_LIB_DESTINATION}
+      COMMAND ${tf_python_op_lib_name}_gen_python @${tensorflow_source_dir}/tensorflow/python/ops/hidden_ops.txt 1 > ${GENERATE_PYTHON_OP_LIB_DESTINATION}
+      DEPENDS ${tf_python_op_lib_name}_gen_python
+    )
+    
+    set(tf_python_ops_generated_files ${tf_python_ops_generated_files}
+        ${GENERATE_PYTHON_OP_LIB_DESTINATION} PARENT_SCOPE)
+endfunction()
+
+GENERATE_PYTHON_OP_LIB("array_ops")
+GENERATE_PYTHON_OP_LIB("math_ops")
+GENERATE_PYTHON_OP_LIB("functional_ops")
+GENERATE_PYTHON_OP_LIB("candidate_sampling_ops")
+GENERATE_PYTHON_OP_LIB("control_flow_ops"
+  ADDITIONAL_LIBRARIES $<TARGET_OBJECTS:tf_no_op>)
+GENERATE_PYTHON_OP_LIB("ctc_ops")
+GENERATE_PYTHON_OP_LIB("data_flow_ops")
+GENERATE_PYTHON_OP_LIB("image_ops")
+GENERATE_PYTHON_OP_LIB("io_ops")
+GENERATE_PYTHON_OP_LIB("linalg_ops")
+GENERATE_PYTHON_OP_LIB("logging_ops")
+GENERATE_PYTHON_OP_LIB("nn_ops")
+GENERATE_PYTHON_OP_LIB("parsing_ops")
+GENERATE_PYTHON_OP_LIB("random_ops")
+GENERATE_PYTHON_OP_LIB("script_ops")
+GENERATE_PYTHON_OP_LIB("state_ops")
+GENERATE_PYTHON_OP_LIB("sparse_ops")
+GENERATE_PYTHON_OP_LIB("string_ops")
+GENERATE_PYTHON_OP_LIB("user_ops")
+GENERATE_PYTHON_OP_LIB("training_ops"
+  DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/training/gen_training_ops.py)
+
+add_custom_target(tf_python_ops SOURCES ${tf_python_ops_generated_files})
+add_dependencies(tf_python_ops tf_python_op_gen_main)
+
+
+############################################################
+# Build the SWIG-wrapped library for the TensorFlow runtime.
+############################################################
+
+# python_deps is a shared library containing all of the TensorFlow
+# runtime and the standard ops and kernels. These are installed into
+# tf_python/tensorflow/python/.
+# TODO(mrry): Refactor this to expose a framework library that
+# facilitates `tf.load_op_library()`.
+add_library(python_deps SHARED
+    "${tensorflow_source_dir}/tensorflow/python/client/tf_session_helper.h"
+    "${tensorflow_source_dir}/tensorflow/python/client/tf_session_helper.cc"
+    "${tensorflow_source_dir}/tensorflow/python/framework/cpp_shape_inference.h"
+    "${tensorflow_source_dir}/tensorflow/python/framework/cpp_shape_inference.cc"
+    "${tensorflow_source_dir}/tensorflow/python/framework/python_op_gen.h"
+    "${tensorflow_source_dir}/tensorflow/python/framework/python_op_gen.cc"
+    "${tensorflow_source_dir}/tensorflow/python/lib/core/numpy.h"
+    "${tensorflow_source_dir}/tensorflow/python/lib/core/numpy.cc"
+    "${tensorflow_source_dir}/tensorflow/python/lib/core/py_func.h"
+    "${tensorflow_source_dir}/tensorflow/python/lib/core/py_func.cc"
+    "${tensorflow_source_dir}/tensorflow/python/lib/io/py_record_reader.h"
+    "${tensorflow_source_dir}/tensorflow/python/lib/io/py_record_reader.cc"
+    "${tensorflow_source_dir}/tensorflow/python/lib/io/py_record_writer.h"
+    "${tensorflow_source_dir}/tensorflow/python/lib/io/py_record_writer.cc"
+    "${tensorflow_source_dir}/tensorflow/c/c_api.cc"
+    "${tensorflow_source_dir}/tensorflow/c/c_api.h"
+    "${tensorflow_source_dir}/tensorflow/c/checkpoint_reader.cc"
+    "${tensorflow_source_dir}/tensorflow/c/checkpoint_reader.h"
+    "${tensorflow_source_dir}/tensorflow/c/tf_status_helper.cc"
+    "${tensorflow_source_dir}/tensorflow/c/tf_status_helper.h"
+    $<TARGET_OBJECTS:tf_core_lib>
+    $<TARGET_OBJECTS:tf_core_cpu>
+    $<TARGET_OBJECTS:tf_core_framework>
+    $<TARGET_OBJECTS:tf_core_ops>
+    $<TARGET_OBJECTS:tf_core_direct_session>
+    $<TARGET_OBJECTS:tf_core_distributed_runtime>
+    $<TARGET_OBJECTS:tf_core_kernels>
+)
+target_link_libraries(python_deps
+    ${CMAKE_THREAD_LIBS_INIT}
+    tf_protos_cc
+    ${GRPC_LIBRARIES}
+    ${PROTOBUF_LIBRARY}
+    re2_lib
+    ${boringssl_STATIC_LIBRARIES}
+    ${farmhash_STATIC_LIBRARIES}
+    ${gif_STATIC_LIBRARIES}
+    ${jpeg_STATIC_LIBRARIES}
+    ${jsoncpp_STATIC_LIBRARIES}
+    ${png_STATIC_LIBRARIES}
+    ${ZLIB_LIBRARIES}
+    ${CMAKE_DL_LIBS}
+)
+target_include_directories(python_deps PUBLIC
+    ${tensorflow_source_dir}
+    ${CMAKE_CURRENT_BINARY_DIR}
+    ${eigen_INCLUDE_DIRS}
+    ${PYTHON_INCLUDE_DIR}
+    ${NUMPY_INCLUDE_DIR}
+)
+# C++11
+target_compile_features(python_deps PRIVATE
+    cxx_rvalue_references
+)
+set_target_properties(python_deps PROPERTIES
+    LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/tf_python/tensorflow/python)
+
+# _pywrap_tensorflow is the target that generates the SWIG bindings
+# and compiles them as a shared library that depends on python_deps.
+set(CMAKE_SWIG_FLAGS "")
+set(CMAKE_SWIG_OUTDIR ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python)
+SET_SOURCE_FILES_PROPERTIES("${tensorflow_source_dir}/tensorflow/python/tensorflow.i"
+    PROPERTIES CPLUSPLUS ON
+)
+SET_PROPERTY(SOURCE "${tensorflow_source_dir}/tensorflow/python/tensorflow.i"
+    PROPERTY SWIG_FLAGS "-I\"${tensorflow_source_dir}\"" "-module" "pywrap_tensorflow"
+)
+SWIG_ADD_MODULE(pywrap_tensorflow python
+    "${tensorflow_source_dir}/tensorflow/python/tensorflow.i"
+)
+SWIG_LINK_LIBRARIES(pywrap_tensorflow
+    python_deps
+    ${PROTOBUF_LIBRARY}
+    ${CMAKE_DL_LIBS}
+)
+target_include_directories(_pywrap_tensorflow PUBLIC
+    ${tensorflow_source_dir}
+    ${CMAKE_CURRENT_BINARY_DIR}
+    ${eigen_INCLUDE_DIRS}
+    ${PYTHON_INCLUDE_DIR}
+    ${NUMPY_INCLUDE_DIR}
+)
+add_dependencies(_pywrap_tensorflow
+    eigen
+    tf_core_direct_session
+    tf_core_distributed_runtime
+    tf_core_framework
+    python_deps
+    tf_python_copy_scripts_to_destination
+    tf_python_ops
+    tf_python_touchup_modules
+)
+# C++11
+target_compile_features(_pywrap_tensorflow PRIVATE
+    cxx_rvalue_references
+)
+set_target_properties(_pywrap_tensorflow PROPERTIES
+    LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/tf_python/tensorflow/python)
diff --git a/tensorflow/contrib/cmake/tf_tools.cmake b/tensorflow/contrib/cmake/tf_tools.cmake
index 5022dfe..f8bbfb3 100644
--- a/tensorflow/contrib/cmake/tf_tools.cmake
+++ b/tensorflow/contrib/cmake/tf_tools.cmake
@@ -33,6 +33,7 @@
 
 add_dependencies(${proto_text}
     tf_core_lib
+    protobuf
     # jpeg_copy_headers_to_destination
     # png_copy_headers_to_destination
     # re2_copy_headers_to_destination
diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index 5da8502..5922a44 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -434,6 +434,8 @@
 $(wildcard tensorflow/core/*/*/*main.cc) \
 $(wildcard tensorflow/core/graph/dot.*) \
 $(wildcard tensorflow/core/lib/gif/*) \
+$(wildcard tensorflow/core/lib/io/zlib*) \
+$(wildcard tensorflow/core/lib/io/record*) \
 $(wildcard tensorflow/core/lib/jpeg/*) \
 $(wildcard tensorflow/core/lib/png/*) \
 $(wildcard tensorflow/core/util/events_writer.*) \
diff --git a/tensorflow/contrib/makefile/proto_text_cc_files.txt b/tensorflow/contrib/makefile/proto_text_cc_files.txt
index 784fa84..648be42 100644
--- a/tensorflow/contrib/makefile/proto_text_cc_files.txt
+++ b/tensorflow/contrib/makefile/proto_text_cc_files.txt
@@ -31,11 +31,14 @@
 tensorflow/core/lib/io/table.cc
 tensorflow/core/lib/io/record_writer.cc
 tensorflow/core/lib/io/record_reader.cc
+tensorflow/core/lib/io/random_inputstream.cc
 tensorflow/core/lib/io/path.cc
 tensorflow/core/lib/io/match.cc
 tensorflow/core/lib/io/iterator.cc
+tensorflow/core/lib/io/inputstream_interface.cc
 tensorflow/core/lib/io/inputbuffer.cc
 tensorflow/core/lib/io/format.cc
+tensorflow/core/lib/io/buffered_inputstream.cc
 tensorflow/core/lib/io/block_builder.cc
 tensorflow/core/lib/io/block.cc
 tensorflow/core/lib/histogram/histogram.cc
diff --git a/tensorflow/contrib/quantization/tools/quantize_graph.py b/tensorflow/contrib/quantization/tools/quantize_graph.py
index 60cbca9..0a814da 100644
--- a/tensorflow/contrib/quantization/tools/quantize_graph.py
+++ b/tensorflow/contrib/quantization/tools/quantize_graph.py
@@ -66,12 +66,12 @@
 
 def print_input_nodes(current_node, nodes_map, indent, already_visited):
   print(" " * indent + current_node.op + ":" + current_node.name)
+  already_visited[current_node.name] = True
   for input_node_name in current_node.input:
     if input_node_name in already_visited:
       continue
     input_node = nodes_map[input_node_name]
     print_input_nodes(input_node, nodes_map, indent + 1, already_visited)
-  already_visited[current_node.name] = True
 
 
 def create_node(op, name, inputs):
@@ -350,13 +350,13 @@
 
   def round_nodes_recursively(self, current_node):
     """The entry point for simple rounding quantization."""
+    self.already_visited[current_node.name] = True
     for input_node_name in current_node.input:
       input_node_name = node_name_from_input(input_node_name)
       if input_node_name in self.already_visited:
         continue
       input_node = self.nodes_map[input_node_name]
       self.round_nodes_recursively(input_node)
-    self.already_visited[current_node.name] = True
     nodes_to_quantize = ["Conv2D", "BiasAdd", "MatMul"]
     if any(current_node.op in s for s in nodes_to_quantize):
       new_node = tf.NodeDef()
@@ -381,13 +381,13 @@
 
   def quantize_nodes_recursively(self, current_node):
     """The entry point for quantizing nodes to eight bit and back."""
+    self.already_visited[current_node.name] = True
     for input_node_name in current_node.input:
       input_node_name = node_name_from_input(input_node_name)
       if input_node_name in self.already_visited:
         continue
       input_node = self.nodes_map[input_node_name]
       self.quantize_nodes_recursively(input_node)
-    self.already_visited[current_node.name] = True
     nodes_to_quantize = ["Conv2D", "BiasAdd", "MatMul"]
     if any(current_node.op in s for s in nodes_to_quantize):
       for input_name in current_node.input:
@@ -448,13 +448,13 @@
 
   def eightbitize_nodes_recursively(self, current_node):
     """The entry point for transforming a graph into full eight bit."""
+    self.already_visited[current_node.name] = True
     for input_node_name in current_node.input:
       input_node_name = node_name_from_input(input_node_name)
       if input_node_name in self.already_visited:
         continue
       input_node = self.nodes_map[input_node_name]
       self.eightbitize_nodes_recursively(input_node)
-    self.already_visited[current_node.name] = True
     if current_node.op == "MatMul":
       self.eightbitize_mat_mul_node(current_node)
     elif current_node.op == "Conv2D":
diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc
index 529881e..c603c3c 100644
--- a/tensorflow/core/distributed_runtime/graph_mgr.cc
+++ b/tensorflow/core/distributed_runtime/graph_mgr.cc
@@ -119,7 +119,7 @@
     mutex_lock l(mu_);
     return strings::StrCat(prefix, "_G", next_id_++);
   };
-  popts.get_incarnation = [this](const string& name) {
+  popts.get_incarnation = [this](const string& name) -> int64 {
     Device* device = nullptr;
     Status s = worker_env_->device_mgr->LookupDevice(name, &device);
     if (s.ok()) {
diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index 372d4ca..5ce6856 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -939,7 +939,7 @@
     mutex_lock l(mu_);
     return strings::StrCat(prefix, "_S", next_node_id_++);
   };
-  popts.get_incarnation = [this](const string& name) {
+  popts.get_incarnation = [this](const string& name) -> int64 {
     Device* d = devices_.FindDeviceByName(name);
     if (d == nullptr) {
       return PartitionOptions::kIllegalIncarnation;
diff --git a/tensorflow/core/distributed_runtime/rpc/BUILD b/tensorflow/core/distributed_runtime/rpc/BUILD
index ae4fe03..b231f48 100644
--- a/tensorflow/core/distributed_runtime/rpc/BUILD
+++ b/tensorflow/core/distributed_runtime/rpc/BUILD
@@ -144,7 +144,7 @@
     ],
 )
 
-cc_library(
+tf_cuda_library(
     name = "grpc_worker_service",
     srcs = ["grpc_worker_service.cc"],
     hdrs = ["grpc_worker_service.h"],
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_call.h b/tensorflow/core/distributed_runtime/rpc/grpc_call.h
index 7062797..35f849c 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_call.h
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_call.h
@@ -247,7 +247,7 @@
 
   // Used as void* completion markers from grpc to indicate different
   // events of interest for a Call.
-  using typename UntypedCall<Service>::Tag;
+  typedef typename UntypedCall<Service>::Tag Tag;
   Tag request_received_tag_{this, Tag::kRequestReceived};
   Tag response_sent_tag_{this, Tag::kResponseSent};
   Tag cancelled_tag_{this, Tag::kCancelled};
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.cc b/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.cc
index 79d3b3e..60597aa 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_remote_worker.cc
@@ -132,8 +132,8 @@
             // the RecvTensor response can not have been sent before
             // the RecvTensor request, and must have been sent before
             // it was received.
-            send_start_usec =
-                std::max(start_usec, response->metadata().send_start_micros());
+            send_start_usec = std::max(start_usec, static_cast<int64>(
+                response->metadata().send_start_micros()));
             send_start_usec = std::min(send_start_usec, end_usec - 1);
           }
           const string& key = request->rendezvous_key();
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
index fdac89e..f51656d 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
@@ -23,7 +23,9 @@
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
+#if GOOGLE_CUDA
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
+#endif  // GOOGLE_CUDA
 #include "tensorflow/core/common_runtime/local_device.h"
 #include "tensorflow/core/common_runtime/process_util.h"
 #include "tensorflow/core/common_runtime/step_stats_collector.h"
@@ -429,10 +431,12 @@
             // device type*.
             // const size_t bytes = is_dead ? 0 : val.TotalBytes();
             const bool on_host = send_args.alloc_attrs.on_host();
-            const DeviceContext* send_dev_context = send_args.device_context;
             {
               // Non-DMA cases.
               if (src_dev->tensorflow_gpu_device_info() && (!on_host)) {
+#if GOOGLE_CUDA
+                const DeviceContext* send_dev_context =
+                    send_args.device_context;
                 RecvTensorResponse* tmp = new RecvTensorResponse;
                 tmp->set_is_dead(is_dead);
                 CHECK(send_dev_context)
@@ -460,6 +464,10 @@
                 GPUUtil::SetProtoFromGPU(val, src_dev, send_dev_context,
                                          tmp->mutable_tensor(), is_dead,
                                          response_ready);
+#else
+                call->SendResponse(ToGrpcStatus(
+                    errors::Internal("No GPU device in process")));
+#endif  // GOOGLE_CUDA
               } else {
                 grpc::EncodeTensorToByteBuffer(is_dead, val, &call->response);
                 call->SendResponse(ToGrpcStatus(Status::OK()));
diff --git a/tensorflow/core/distributed_runtime/tensor_coding.cc b/tensorflow/core/distributed_runtime/tensor_coding.cc
index 24c2608..49507dc 100644
--- a/tensorflow/core/distributed_runtime/tensor_coding.cc
+++ b/tensorflow/core/distributed_runtime/tensor_coding.cc
@@ -108,7 +108,7 @@
 }
 
 bool ReadVarintSizeAsInt(protobuf::io::CodedInputStream* input, int* result) {
-  uint64 v;
+  protobuf_uint64 v;
   if (input->ReadVarint64(&v) && v <= static_cast<uint64>(INT_MAX)) {
     *result = static_cast<int>(v);
     return true;
@@ -237,7 +237,7 @@
         break;
       }
       case RecvTensorResponse::kSendStartMicrosFieldNumber: {
-        uint64 v;
+        protobuf_uint64 v;
         if ((wt != WIRETYPE_VARINT) || !input.ReadVarint64(&v)) return false;
         meta_.set_send_start_micros(static_cast<int64>(v));
         break;
diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD
index 158c42b..e2118c0 100644
--- a/tensorflow/core/platform/default/build_config/BUILD
+++ b/tensorflow/core/platform/default/build_config/BUILD
@@ -58,12 +58,12 @@
     copts = tf_copts(),
     deps = [
         "//tensorflow/core:protos_cc",
+        "@com_googlesource_code_re2//:re2",
         "@farmhash_archive//:farmhash",
         "@gif_archive//:gif",
         "@highwayhash//:sip_hash",
         "@jpeg_archive//:jpeg",
         "@png_archive//:png",
-        "@re2//:re2",
     ],
 )
 
diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py
index acd3005..f226de0 100644
--- a/tensorflow/examples/image_retraining/retrain.py
+++ b/tensorflow/examples/image_retraining/retrain.py
@@ -268,7 +268,8 @@
     tf.logging.fatal('Category does not exist %s.', category)
   category_list = label_lists[category]
   if not category_list:
-    tf.logging.fatal('Category has no images - %s.', category)
+    tf.logging.fatal('Label %s has no images in the category %s.',
+                     label_name, category)
   mod_index = index % len(category_list)
   base_name = category_list[mod_index]
   sub_dir = label_lists['dir']
@@ -704,7 +705,7 @@
     tf.scalar_summary('mean/' + name, mean)
     with tf.name_scope('stddev'):
       stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
-    tf.scalar_summary('sttdev/' + name, stddev)
+    tf.scalar_summary('stddev/' + name, stddev)
     tf.scalar_summary('max/' + name, tf.reduce_max(var))
     tf.scalar_summary('min/' + name, tf.reduce_min(var))
     tf.histogram_summary(name, var)
diff --git a/tensorflow/examples/learn/wide_n_deep_tutorial.py b/tensorflow/examples/learn/wide_n_deep_tutorial.py
index 5a23087..8530930 100644
--- a/tensorflow/examples/learn/wide_n_deep_tutorial.py
+++ b/tensorflow/examples/learn/wide_n_deep_tutorial.py
@@ -18,7 +18,7 @@
 from __future__ import print_function
 
 import tempfile
-import urllib
+from six.moves import urllib
 
 import pandas as pd
 import tensorflow as tf
@@ -56,7 +56,7 @@
     train_file_name = FLAGS.train_data
   else:
     train_file = tempfile.NamedTemporaryFile(delete=False)
-    urllib.urlretrieve("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data", train_file.name)  # pylint: disable=line-too-long
+    urllib.request.urlretrieve("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data", train_file.name)  # pylint: disable=line-too-long
     train_file_name = train_file.name
     train_file.close()
     print("Training data is downloaded to %s" % train_file_name)
@@ -65,7 +65,7 @@
     test_file_name = FLAGS.test_data
   else:
     test_file = tempfile.NamedTemporaryFile(delete=False)
-    urllib.urlretrieve("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test", test_file.name)  # pylint: disable=line-too-long
+    urllib.request.urlretrieve("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test", test_file.name)  # pylint: disable=line-too-long
     test_file_name = test_file.name
     test_file.close()
     print("Test data is downloaded to %s" % test_file_name)
diff --git a/tensorflow/examples/tutorials/deepdream/deepdream.ipynb b/tensorflow/examples/tutorials/deepdream/deepdream.ipynb
index 3f93904..d0dbd46 100644
--- a/tensorflow/examples/tutorials/deepdream/deepdream.ipynb
+++ b/tensorflow/examples/tutorials/deepdream/deepdream.ipynb
@@ -87,13 +87,13 @@
    "outputs": [],
    "source": [
     "# boilerplate code\n",
+    "from __future__ import print_function\n",
     "import os\n",
     "from io import BytesIO\n",
     "import numpy as np\n",
     "from functools import partial\n",
     "import PIL.Image\n",
     "from IPython.display import clear_output, Image, display, HTML\n",
-    "from __future__ import print_function\n",
     "\n",
     "import tensorflow as tf"
    ]
diff --git a/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py b/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
index 868cfcc..70c9ad4 100644
--- a/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
+++ b/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
@@ -76,7 +76,7 @@
       tf.scalar_summary('mean/' + name, mean)
       with tf.name_scope('stddev'):
         stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
-      tf.scalar_summary('sttdev/' + name, stddev)
+      tf.scalar_summary('stddev/' + name, stddev)
       tf.scalar_summary('max/' + name, tf.reduce_max(var))
       tf.scalar_summary('min/' + name, tf.reduce_min(var))
       tf.histogram_summary(name, var)
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.conv2d_transpose.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.conv2d_transpose.md
index 5b2a0bb..e60a31e 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.conv2d_transpose.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.conv2d_transpose.md
@@ -11,9 +11,9 @@
 
 
 *  <b>`value`</b>: A 4-D `Tensor` of type `float` and shape
-    `[batch, height, width, in_channels]`.
+    `[batch, in_height, in_width, in_channels]`.
 *  <b>`filter`</b>: A 4-D `Tensor` with the same type as `value` and shape
-    `[height, width, output_channels, in_channels]`.  `filter`'s
+    `[filter_height, filter_width, output_channels, in_channels]`.  `filter`'s
     `in_channels` dimension must match that of `value`.
 *  <b>`output_shape`</b>: A 1-D `Tensor` representing the output shape of the
     deconvolution op.
diff --git a/tensorflow/g3doc/get_started/os_setup.md b/tensorflow/g3doc/get_started/os_setup.md
index 6e20b3e..d172a41 100644
--- a/tensorflow/g3doc/get_started/os_setup.md
+++ b/tensorflow/g3doc/get_started/os_setup.md
@@ -845,20 +845,20 @@
 
 ```bash
 # Ubuntu/Linux 64-bit:
-$ pip install --upgrade https://storage.googleapis.com/tensorflow/linux/cpu/protobuf-3.0.0b2.post2-cp27-none-linux_x86_64.whl
+$ pip install --upgrade https://storage.googleapis.com/tensorflow/linux/cpu/protobuf-3.0.0-cp27-none-linux_x86_64.whl
 
 # Mac OS X:
-$ pip install --upgrade https://storage.googleapis.com/tensorflow/mac/protobuf-3.0.0b2.post2-cp27-none-any.whl
+$ pip install --upgrade https://storage.googleapis.com/tensorflow/mac/cpu/protobuf-3.0.0-cp27-cp27m-macosx_10_11_x86_64.whl
 ```
 
 and for Python 3 :
 
 ```bash
 # Ubuntu/Linux 64-bit:
-$ pip3 install --upgrade https://storage.googleapis.com/tensorflow/linux/cpu/protobuf-3.0.0b2.post2-cp34-none-linux_x86_64.whl
+$ pip3 install --upgrade https://storage.googleapis.com/tensorflow/linux/cpu/protobuf-3.0.0-cp3-none-linux_x86_64.whl
 
 # Mac OS X:
-$ pip3 install --upgrade https://storage.googleapis.com/tensorflow/mac/protobuf-3.0.0b2.post2-cp35-none-any.whl
+$ pip3 install --upgrade https://storage.googleapis.com/tensorflow/mac/cpu/protobuf-3.0.0-cp3-cp3m-macosx_10_11_x86_64.whl
 ```
 
 Install the above package _after_ you have installed TensorFlow via pip, as the
diff --git a/tensorflow/g3doc/how_tos/adding_an_op/index.md b/tensorflow/g3doc/how_tos/adding_an_op/index.md
index b3cab40..99787ac 100644
--- a/tensorflow/g3doc/how_tos/adding_an_op/index.md
+++ b/tensorflow/g3doc/how_tos/adding_an_op/index.md
@@ -139,7 +139,7 @@
 ```bash
 TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
 
-g++ -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC -I $TF_INC
+g++ -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC -I $TF_INC -O2
 ```
 
 On Mac OS X, the additional flag "-undefined dynamic_lookup" is required when
diff --git a/tensorflow/g3doc/how_tos/summaries_and_tensorboard/index.md b/tensorflow/g3doc/how_tos/summaries_and_tensorboard/index.md
index e487d8f..69b6c26 100644
--- a/tensorflow/g3doc/how_tos/summaries_and_tensorboard/index.md
+++ b/tensorflow/g3doc/how_tos/summaries_and_tensorboard/index.md
@@ -9,7 +9,7 @@
 TensorBoard is fully configured, it looks like this:
 
 [![MNIST TensorBoard](../../images/mnist_tensorboard.png "MNIST TensorBoard")](http://tensorflow.org/tensorboard)  
-[*Click try a TensorBoard with data from this tutorial!*](http://tensorflow.org/tensorboard)
+[*Click to try TensorBoard with data from this tutorial!*](http://tensorflow.org/tensorboard)
 
 This tutorial is intended to get you started with simple TensorBoard usage.
 There are other resources available as well! The [TensorBoard README](https://www.tensorflow.org/code/tensorflow/tensorboard/README.md)
@@ -87,7 +87,7 @@
     tf.scalar_summary('mean/' + name, mean)
     with tf.name_scope('stddev'):
       stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
-    tf.scalar_summary('sttdev/' + name, stddev)
+    tf.scalar_summary('stddev/' + name, stddev)
     tf.scalar_summary('max/' + name, tf.reduce_max(var))
     tf.scalar_summary('min/' + name, tf.reduce_min(var))
     tf.histogram_summary(name, var)
@@ -182,7 +182,8 @@
 
 ## Launching TensorBoard
 
-To run TensorBoard, use the command
+To run TensorBoard, use the following command (alternatively `python -m
+tensorflow.tensorboard`)
 
 ```bash
 tensorboard --logdir=path/to/log-directory
diff --git a/tensorflow/g3doc/how_tos/tool_developers/index.md b/tensorflow/g3doc/how_tos/tool_developers/index.md
index 94eb182..271e38e 100644
--- a/tensorflow/g3doc/how_tos/tool_developers/index.md
+++ b/tensorflow/g3doc/how_tos/tool_developers/index.md
@@ -56,7 +56,7 @@
 
 ```python
   if FLAGS.input_binary:
-    graph_def.ParseFromString(f.read)
+    graph_def.ParseFromString(f.read())
   else:
     text_format.Merge(f.read(), graph_def)
 ```
diff --git a/tensorflow/g3doc/resources/dims_types.md b/tensorflow/g3doc/resources/dims_types.md
index 7611019..3fedbbc 100644
--- a/tensorflow/g3doc/resources/dims_types.md
+++ b/tensorflow/g3doc/resources/dims_types.md
@@ -59,6 +59,7 @@
 `DT_INT32` | `tf.int32` | 32 bits signed integer.
 `DT_INT64` | `tf.int64` | 64 bits signed integer.
 `DT_UINT8` | `tf.uint8` | 8 bits unsigned integer.
+`DT_UINT16` | `tf.uint16` | 16 bits unsigned integer.
 `DT_STRING` | `tf.string` | Variable length byte arrays.  Each element of a Tensor is a byte array.
 `DT_BOOL` | `tf.bool` | Boolean.
 `DT_COMPLEX64` | `tf.complex64` | Complex number made of two 32 bits floating points: real and imaginary parts.
diff --git a/tensorflow/g3doc/tutorials/recurrent/index.md b/tensorflow/g3doc/tutorials/recurrent/index.md
index 82b159c..3ab8061 100644
--- a/tensorflow/g3doc/tutorials/recurrent/index.md
+++ b/tensorflow/g3doc/tutorials/recurrent/index.md
@@ -61,7 +61,7 @@
 lstm = rnn_cell.BasicLSTMCell(lstm_size)
 # Initial state of the LSTM memory.
 state = tf.zeros([batch_size, lstm.state_size])
-
+probabilities = []
 loss = 0.0
 for current_batch_of_words in words_in_dataset:
     # The value of state is updated after processing each batch of words.
@@ -69,7 +69,7 @@
 
     # The LSTM output can be used to make next word predictions
     logits = tf.matmul(output, softmax_w) + softmax_b
-    probabilities = tf.nn.softmax(logits)
+    probabilities.append(tf.nn.softmax(logits))
     loss += loss_function(probabilities, target_words)
 ```
 
diff --git a/tensorflow/python/kernel_tests/py_func_test.py b/tensorflow/python/kernel_tests/py_func_test.py
index 3d23e4a..f314712 100644
--- a/tensorflow/python/kernel_tests/py_func_test.py
+++ b/tensorflow/python/kernel_tests/py_func_test.py
@@ -34,6 +34,13 @@
     def my_func(x, y):
       return np.sinh(x) + np.cosh(y)
 
+    # single type
+    with self.test_session():
+      x = tf.constant(1.0, tf.float32)
+      y = tf.constant(2.0, tf.float32)
+      z = tf.py_func(my_func, [x, y], tf.float32)
+      self.assertEqual(z.eval(), my_func(1.0, 2.0).astype(np.float32))
+
     # scalar
     with self.test_session():
       x = tf.constant(1.0, tf.float32)
diff --git a/tensorflow/python/kernel_tests/reduction_ops_test.py b/tensorflow/python/kernel_tests/reduction_ops_test.py
index a8023f2..7e250e1 100644
--- a/tensorflow/python/kernel_tests/reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/reduction_ops_test.py
@@ -260,6 +260,9 @@
   def testGradient4(self):
     self._compareGradient([2, 3, 4, 2], [], None)
 
+  def testGradient5(self):
+    self._compareGradient([2, 3, 4, 2], [3, 4, 2], 0)
+
   def testHighRank(self):
     # Do a bunch of random high dimensional reductions
     np.random.seed(42)
@@ -380,6 +383,15 @@
           t, s, su, [2, 3, 4, 2], x_init_value=x, delta=1)
       self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
 
+      su = tf.reduce_mean(t, 0)
+      jacob_t, jacob_n = tf.test.compute_gradient(t,
+                                                  s,
+                                                  su,
+                                                  [3, 4, 2],
+                                                  x_init_value=x,
+                                                  delta=1)
+      self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
+
   def testEmptyGradients(self):
     with self.test_session():
       x = tf.zeros([0, 3])
@@ -459,6 +471,15 @@
           t, x.shape, su, [1], x_init_value=x, delta=1)
       self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
 
+      su = tf.reduce_prod(t, 0)
+      jacob_t, jacob_n = tf.test.compute_gradient(t,
+                                                  x.shape,
+                                                  su,
+                                                  [3, 4, 2],
+                                                  x_init_value=x,
+                                                  delta=1)
+      self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
+
   def testGradientWithZeros(self):
     s = [2, 3, 4, 2]
     x = np.arange(1.0, 49.0).reshape(s).astype(np.float32) / 20.
diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py
index 51e42d9..7483d70 100644
--- a/tensorflow/python/ops/math_grad.py
+++ b/tensorflow/python/ops/math_grad.py
@@ -110,6 +110,8 @@
   # cumprod operations.
 
   input_shape = array_ops.shape(op.inputs[0])
+  # Reshape reduction indices for the case where the parameter is a scalar
+  reduction_indices = array_ops.reshape(op.inputs[1], [-1])
 
   # Expand grad to full input shape
   output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
@@ -122,7 +124,7 @@
   # so we need to cast here.  We put all the shape-related ops on CPU to avoid
   # copying back and forth, and since listdiff is CPU only.
   with ops.device("/cpu:0"):
-    reduced = math_ops.cast(op.inputs[1], dtypes.int32)
+    reduced = math_ops.cast(reduction_indices, dtypes.int32)
     idx = math_ops.range(0, array_ops.rank(op.inputs[0]))
     other, _ = array_ops.listdiff(idx, reduced)
     perm = array_ops.concat(0, [reduced, other])
diff --git a/tensorflow/python/ops/nn.py b/tensorflow/python/ops/nn.py
index 3c3c0cc..cc157aa 100644
--- a/tensorflow/python/ops/nn.py
+++ b/tensorflow/python/ops/nn.py
@@ -111,6 +111,7 @@
 @@conv2d_transpose
 @@conv1d
 @@conv3d
+@@conv3d_transpose
 
 ## Pooling
 
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 20ef9f4..4ccb320 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -751,8 +751,8 @@
     if logits.get_shape().ndims is not None and (
         labels_static_shape.ndims is not None and
         labels_static_shape.ndims != logits.get_shape().ndims - 1):
-      raise ValueError("Rank mismatch: Labels rank (received %s) should equal "
-                       "logits rank (received %s) - 1." %
+      raise ValueError("Rank mismatch: Rank of labels (received %s) should equal "
+                       "rank of logits minus 1 (received %s)." %
                        (labels_static_shape.ndims, logits.get_shape().ndims))
     # Check if no reshapes are required.
     if logits.get_shape().ndims == 2:
diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py
index 050dcd1..ce5129e 100644
--- a/tensorflow/python/ops/script_ops.py
+++ b/tensorflow/python/ops/script_ops.py
@@ -132,8 +132,8 @@
   Args:
     func: A python function.
     inp: A list of `Tensor`.
-    Tout: A list of tensorflow data types indicating what `func`
-          returns.
+    Tout: A list of tensorflow data types or a single tensorflow data type
+          indicating what `func` returns.
     stateful: A boolean indicating whether the function should be considered
               stateful or stateless. I.e. whether it, given the same input, will
               return the same output and at the same time does not change state
@@ -142,7 +142,7 @@
     name: A name for the operation (optional).
 
   Returns:
-    A list of `Tensor` which `func` computes.
+    A list of `Tensor` or a single `Tensor` which `func` computes.
   """
   token = _py_funcs.insert(func)
   # We tie the registered function's life-time with the current
@@ -162,14 +162,20 @@
   # the funcs registry.
   g._cleanup_py_funcs_used_in_graph.append(cleanup)
 
+  if isinstance(Tout, list):
+    is_list = True
+  else:
+    Tout = [Tout]
+    is_list = False
   if stateful:
-    return gen_script_ops._py_func(input=inp, token=token, Tout=Tout, name=name)
+    result = gen_script_ops._py_func(
+            input=inp, token=token, Tout=Tout, name=name)
     # pylint: enable=protected-access
   else:
-    return gen_script_ops._py_func_stateless(
-        input=inp, token=token, Tout=Tout,
-        name=name)
+    result = gen_script_ops._py_func_stateless(
+        input=inp, token=token, Tout=Tout, name=name)
     # pylint: enable=protected-access
+  return result if is_list else result[0]
 
 
 ops.RegisterShape("PyFunc")(common_shapes.call_cpp_shape_fn)
diff --git a/tensorflow/python/ops/seq2seq.py b/tensorflow/python/ops/seq2seq.py
index f96e00a..7a4b547 100644
--- a/tensorflow/python/ops/seq2seq.py
+++ b/tensorflow/python/ops/seq2seq.py
@@ -249,8 +249,11 @@
 
   Returns:
     A tuple of the form (outputs, state), where:
-      outputs: A list of the same length as decoder_inputs of 2D Tensors with
-        shape [batch_size x output_size] containing the generated outputs.
+      outputs: A list of the same length as decoder_inputs of 2D Tensors. The
+        output is of shape [batch_size x cell.output_size] when
+        output_projection is not None (and represents the dense representation
+        of predicted tokens). It is of shape [batch_size x num_decoder_symbols]
+        when output_projection is None.
       state: The state of each decoder cell in each time-step. This is a list
         with length len(decoder_inputs) -- one item for each time-step.
         It is a 2D Tensor of shape [batch_size x cell.state_size].
@@ -318,9 +321,11 @@
 
   Returns:
     A tuple of the form (outputs, state), where:
-      outputs: A list of the same length as decoder_inputs of 2D Tensors with
-        shape [batch_size x num_decoder_symbols] containing the generated
-        outputs.
+      outputs: A list of the same length as decoder_inputs of 2D Tensors. The
+        output is of shape [batch_size x cell.output_size] when
+        output_projection is not None (and represents the dense representation
+        of predicted tokens). It is of shape [batch_size x num_decoder_symbols]
+        when output_projection is None.
       state: The state of each decoder cell in each time-step. This is a list
         with length len(decoder_inputs) -- one item for each time-step.
         It is a 2D Tensor of shape [batch_size x cell.state_size].
@@ -1082,7 +1087,9 @@
   Returns:
     A tuple of the form (outputs, losses), where:
       outputs: The outputs for each bucket. Its j'th element consists of a list
-        of 2D Tensors of shape [batch_size x num_decoder_symbols] (jth outputs).
+        of 2D Tensors. The shape of output tensors can be either
+        [batch_size x output_size] or [batch_size x num_decoder_symbols]
+        depending on the seq2seq model used.
       losses: List of scalar Tensors, representing losses for each bucket, or,
         if per_example_loss is set, a list of 1D batch-sized float Tensors.
 
diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py
index deeec2f..3990c04 100644
--- a/tensorflow/python/training/optimizer.py
+++ b/tensorflow/python/training/optimizer.py
@@ -282,26 +282,37 @@
     # This is a default implementation of apply_gradients() that can be shared
     # by most optimizers.  It relies on the subclass implementing the following
     # methods: _create_slots(), _prepare(), _apply_dense(), and _apply_sparse().
+
     grads_and_vars = tuple(grads_and_vars)  # Make sure repeat iteration works
+    converted_grads_and_vars = []
     for g, v in grads_and_vars:
+      if g is not None:
+        try:
+          # Convert the grad to Tensor or IndexedSlices if necessary
+          g = ops.convert_to_tensor_or_indexed_slices(g)
+        except TypeError:
+          raise TypeError(
+              "Gradient must be convertible to a Tensor or IndexedSlices, or None: %s" %g)  
       if not isinstance(g, (ops.Tensor, ops.IndexedSlices, type(None))):
         raise TypeError(
             "Gradient must be a Tensor, IndexedSlices, or None: %s" % g)
       if not isinstance(v, variables.Variable):
         raise TypeError(
             "Variable must be a tf.Variable: %s" % v)
-      if g is not None:
-        self._assert_valid_dtypes([g, v])
-    var_list = [v for g, v in grads_and_vars if g is not None]
+
+      converted_grads_and_vars.append((g,v))
+    
+    converted_grads_and_vars = tuple(converted_grads_and_vars)
+    var_list = [v for g, v in converted_grads_and_vars if g is not None]
     if not var_list:
       raise ValueError("No gradients provided for any variable: %s" %
-                       (grads_and_vars,))
+                       (converted_grads_and_vars,))
     with ops.control_dependencies(None):
       self._create_slots(var_list)
     update_ops = []
     with ops.name_scope(name, self._name) as name:
       self._prepare()
-      for grad, var in grads_and_vars:
+      for grad, var in converted_grads_and_vars:
         if grad is None:
           continue
         # We colocate all ops created in _apply_dense or _apply_sparse
diff --git a/tensorflow/python/training/optimizer_test.py b/tensorflow/python/training/optimizer_test.py
index 13e8cb9..ab4eecf 100644
--- a/tensorflow/python/training/optimizer_test.py
+++ b/tensorflow/python/training/optimizer_test.py
@@ -113,6 +113,33 @@
           # var1 has no gradient
           sgd_op.minimize(cost, global_step, [var1])
 
+  def testGradientsAsVariables(self):
+    for dtype in [tf.half, tf.float32, tf.float64]:
+      with self.test_session() as sess:
+        var0 = tf.Variable([1.0, 2.0], dtype=dtype)
+        var1 = tf.Variable([3.0, 4.0], dtype=dtype)
+        cost = 5 * var0 + 3 * var1
+        global_step = tf.Variable(tf.zeros([], tf.int64), name='global_step')
+        sgd_op = tf.train.GradientDescentOptimizer(3.0)
+        grads_and_vars = sgd_op.compute_gradients(cost, [var0, var1])
+        # Convert gradients to tf.Variables
+        converted_grads = [tf.Variable(tf.zeros([2], dtype)) for i in grads_and_vars]
+        convert_ops = [tf.assign(converted_grads[i], gv[0]) for i,gv in enumerate(grads_and_vars)]
+        
+        converted_grads_and_vars = list(zip(converted_grads, [var0, var1]))
+        opt_op = sgd_op.apply_gradients(converted_grads_and_vars, global_step)
+
+        tf.initialize_all_variables().run()
+        # Run convert_ops to achieve the gradietns converting
+        sess.run(convert_ops)
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+        # Run 1 step of sgd through optimizer
+        opt_op.run()
+        # Validate updated params
+        self.assertAllClose([-14., -13.], var0.eval())
+        self.assertAllClose([-6., -5.], var1.eval()) 
 
 if __name__ == '__main__':
   tf.test.main()
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index 0277a7d..52256a7 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -925,8 +925,10 @@
       LOG(INFO) << "successful NUMA node read from SysFS had negative value ("
                 << value << "), but there must be at least one NUMA node"
                             ", so returning NUMA node zero";
+      fclose(file);
       return 0;
     }
+    fclose(file);
     return value;
   }
 
@@ -934,6 +936,7 @@
       << "could not convert SysFS file contents to integral NUMA node value: "
       << content;
 
+  fclose(file);
   return kUnknownNumaNode;
 #endif
 }
diff --git a/tensorflow/stream_executor/dso_loader.cc b/tensorflow/stream_executor/dso_loader.cc
index 83f2cad..4a96b04 100644
--- a/tensorflow/stream_executor/dso_loader.cc
+++ b/tensorflow/stream_executor/dso_loader.cc
@@ -31,6 +31,7 @@
 #include "tensorflow/core/platform/load_library.h"
 #include "tensorflow/stream_executor/lib/error.h"
 #include "tensorflow/stream_executor/lib/str_util.h"
+#include "tensorflow/stream_executor/lib/str_util.h"
 #include "tensorflow/stream_executor/lib/strcat.h"
 #include "tensorflow/stream_executor/lib/stringprintf.h"
 #include "tensorflow/stream_executor/platform/logging.h"
diff --git a/tensorflow/tensorboard/BUILD b/tensorflow/tensorboard/BUILD
index 64ffb75..e434a7c 100644
--- a/tensorflow/tensorboard/BUILD
+++ b/tensorflow/tensorboard/BUILD
@@ -21,7 +21,10 @@
 
 py_binary(
     name = "tensorboard",
-    srcs = ["tensorboard.py"],
+    srcs = [
+        "__main__.py",
+        "tensorboard.py",
+    ],
     data = [":frontend"],
     srcs_version = "PY2AND3",
     deps = [
diff --git a/tensorflow/tensorboard/__main__.py b/tensorflow/tensorboard/__main__.py
new file mode 100644
index 0000000..1871409
--- /dev/null
+++ b/tensorflow/tensorboard/__main__.py
@@ -0,0 +1,25 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sys
+
+from tensorflow.tensorboard.tensorboard import main
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/tensorflow/tensorboard/tensorboard.py b/tensorflow/tensorboard/tensorboard.py
index 273065e..39e000d 100644
--- a/tensorflow/tensorboard/tensorboard.py
+++ b/tensorflow/tensorboard/tensorboard.py
@@ -140,7 +140,11 @@
   print('Starting TensorBoard %s on port %d' % (tag, FLAGS.port))
 
   if FLAGS.host == "0.0.0.0":
-    print('(You can navigate to http://%s:%d)' % (socket.gethostbyname(socket.gethostname()), FLAGS.port))
+    try:
+      host = socket.gethostbyname(socket.gethostname())
+      print('(You can navigate to http://%s:%d)' % (host, FLAGS.port))
+    except socket.gaierror:
+      pass
   else:
     print('(You can navigate to http://%s:%d)' % (FLAGS.host, FLAGS.port))
 
diff --git a/tensorflow/tools/ci_build/Dockerfile.cmake b/tensorflow/tools/ci_build/Dockerfile.cmake
index 4b2e7d3..8a28fe6 100644
--- a/tensorflow/tools/ci_build/Dockerfile.cmake
+++ b/tensorflow/tools/ci_build/Dockerfile.cmake
@@ -6,7 +6,6 @@
 COPY install/*.sh /install/
 RUN /install/install_bootstrap_deb_packages.sh
 RUN /install/install_deb_packages.sh
-RUN /install/install_proto3_from_source.sh
 
 RUN pip install --upgrade numpy
 
diff --git a/tensorflow/tools/docker/Dockerfile b/tensorflow/tools/docker/Dockerfile
index 3bdebd6..d8a7839 100644
--- a/tensorflow/tools/docker/Dockerfile
+++ b/tensorflow/tools/docker/Dockerfile
@@ -12,10 +12,8 @@
         pkg-config \
         python \
         python-dev \
-        python-numpy \
-        python-pip \
-        python-scipy \
         rsync \
+        software-properties-common \
         unzip \
         && \
     apt-get clean && \
@@ -29,6 +27,8 @@
         ipykernel \
         jupyter \
         matplotlib \
+        numpy \
+        scipy \
         && \
     python -m ipykernel.kernelspec
 
diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index 4f994bd..69868b2 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -11,8 +11,6 @@
         libzmq3-dev \
         pkg-config \
         python-dev \
-        python-numpy \
-        python-pip \
         rsync \
         software-properties-common \
         swig \
@@ -31,6 +29,7 @@
         ipykernel \
         jupyter \
         matplotlib \
+        numpy \
         && \
     python -m ipykernel.kernelspec
 
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index 4877f8c..2008ccd 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -12,8 +12,6 @@
         pkg-config \
         python \
         python-dev \
-        python-numpy \
-        python-pip \
         rsync \
         software-properties-common \
         swig \
@@ -32,6 +30,7 @@
         ipykernel \
         jupyter \
         matplotlib \
+        numpy \
         && \
     python -m ipykernel.kernelspec
 
@@ -86,8 +85,10 @@
 WORKDIR /tensorflow
 
 # Configure the build for our CUDA configuration.
+ENV CUDA_PATH /usr/local/cuda
 ENV CUDA_TOOLKIT_PATH /usr/local/cuda
-ENV CUDNN_INSTALL_PATH /usr/local/cuda
+ENV CUDNN_INSTALL_PATH /usr/lib/x86_64-linux-gnu
+ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64
 ENV TF_NEED_CUDA 1
 
 RUN ./configure && \
@@ -97,9 +98,6 @@
 
 WORKDIR /root
 
-# Set up CUDA variables
-ENV CUDA_PATH /usr/local/cuda
-
 # TensorBoard
 EXPOSE 6006
 # IPython
diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu
index 841f6124..428f450 100644
--- a/tensorflow/tools/docker/Dockerfile.gpu
+++ b/tensorflow/tools/docker/Dockerfile.gpu
@@ -12,10 +12,8 @@
         pkg-config \
         python \
         python-dev \
-        python-numpy \
-        python-pip \
-        python-scipy \
         rsync \
+        software-properties-common \
         unzip \
         && \
     apt-get clean && \
@@ -29,6 +27,8 @@
         ipykernel \
         jupyter \
         matplotlib \
+        numpy \
+        scipy \
         && \
     python -m ipykernel.kernelspec
 
diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh
index 86d1fb1..40f7ae9 100755
--- a/tensorflow/tools/pip_package/build_pip_package.sh
+++ b/tensorflow/tools/pip_package/build_pip_package.sh
@@ -17,6 +17,14 @@
 
 set -e
 
+function cp_external() {
+  local src_dir=$1
+  local dest_dir=$2
+  for f in `find "$src_dir" -maxdepth 1 -mindepth 1 ! -name '*local_config_cuda*'`; do
+    cp -R "$f" "$dest_dir"
+  done
+}
+
 function main() {
   if [ $# -lt 1 ] ; then
     echo "No destination dir provided"
@@ -36,15 +44,23 @@
   if [ ! -d bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow ]; then
     # Really old (0.2.1-) runfiles, without workspace name.
     cp -R \
-      bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/{tensorflow,external} \
+      bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/tensorflow \
       "${TMPDIR}"
+    mkdir "${TMPDIR}/external"
+    cp_external \
+      bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/external \
+      "${TMPDIR}/external"
     RUNFILES=bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles
   else
     if [ -d bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow/external ]; then
       # Old-style runfiles structure (--legacy_external_runfiles).
       cp -R \
-        bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow/{tensorflow,external} \
+        bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow/tensorflow \
         "${TMPDIR}"
+      mkdir "${TMPDIR}/external"
+      cp_external \
+        bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow/external \
+        "${TMPDIR}/external"
     else
       # New-style runfiles structure (--nolegacy_external_runfiles).
       cp -R \
@@ -52,7 +68,7 @@
         "${TMPDIR}"
       mkdir "${TMPDIR}/external"
       # Note: this makes an extra copy of org_tensorflow.
-      cp -R \
+      cp_external \
         bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles \
         "${TMPDIR}/external"
     fi
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 993d01a..728a37c 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -24,9 +24,9 @@
   )
 
   native.git_repository(
-    name = "re2",
+    name = "com_googlesource_code_re2",
     remote = "https://github.com/google/re2.git",
-    commit = "791beff",
+    commit = "fc6337a382bfd4f7c861abea08f872d3c85b31da",
   )
 
   native.git_repository(
@@ -50,7 +50,7 @@
   native.git_repository(
     name = "highwayhash",
     remote = "https://github.com/google/highwayhash.git",
-    commit = "be5edafc2e1a455768e260ccd68ae7317b6690ee",
+    commit = "4bce8fc6a9ca454d9d377dbc4c4d33488bbab78f",
     init_submodules = True,
   )
 
diff --git a/third_party/gpus/crosstool/CROSSTOOL.tpl b/third_party/gpus/crosstool/CROSSTOOL.tpl
index a367aa8..18aab5b 100644
--- a/third_party/gpus/crosstool/CROSSTOOL.tpl
+++ b/third_party/gpus/crosstool/CROSSTOOL.tpl
@@ -57,13 +57,7 @@
   linker_flag: "-lstdc++"
   linker_flag: "-B/usr/bin/"
 
-  # TODO(bazel-team): In theory, the path here ought to exactly match the path
-  # used by gcc. That works because bazel currently doesn't track files at
-  # absolute locations and has no remote execution, yet. However, this will need
-  # to be fixed, maybe with auto-detection?
-  cxx_builtin_include_directory: "/usr/lib/gcc/"
-  cxx_builtin_include_directory: "/usr/local/include"
-  cxx_builtin_include_directory: "/usr/include"
+%{gcc_host_compiler_includes}
   tool_path { name: "gcov" path: "/usr/bin/gcov" }
 
   # C(++) compiles invoke the compiler (as that is the one knowing where
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index 1e47bfa..ddd376c 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -86,9 +86,33 @@
   return [repository_ctx.path(_cxx_inc_convert(p))
           for p in inc_dirs.split("\n")]
 
+def auto_configure_fail(msg):
+  """Output failure message when auto configuration fails."""
+  red = "\033[0;31m"
+  no_color = "\033[0m"
+  fail("\n%sAuto-Configuration Error:%s %s\n" % (red, no_color, msg))
 # END cc_configure common functions (see TODO above).
 
 
+def _gcc_host_compiler_includes(repository_ctx, cc):
+  """Generates the cxx_builtin_include_directory entries for gcc inc dirs.
+
+  Args:
+    repository_ctx: The repository context.
+    cc: The path to the gcc host compiler.
+
+  Returns:
+    A string containing the cxx_builtin_include_directory for each of the gcc
+    host compiler include directories, which can be added to the CROSSTOOL
+    file.
+  """
+  inc_dirs = get_cxx_inc_directories(repository_ctx, cc)
+  inc_entries = []
+  for inc_dir in inc_dirs:
+    inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % inc_dir)
+  return "\n".join(inc_entries)
+
+
 def _enable_cuda(repository_ctx):
   if "TF_NEED_CUDA" in repository_ctx.os.environ:
     enable_cuda = repository_ctx.os.environ["TF_NEED_CUDA"].strip()
@@ -102,7 +126,7 @@
   if _CUDA_TOOLKIT_PATH in repository_ctx.os.environ:
     cuda_toolkit_path = repository_ctx.os.environ[_CUDA_TOOLKIT_PATH].strip()
   if not repository_ctx.path(cuda_toolkit_path).exists:
-    fail("Cannot find cuda toolkit path.")
+    auto_configure_fail("Cannot find cuda toolkit path.")
   return cuda_toolkit_path
 
 
@@ -112,7 +136,7 @@
   if _CUDNN_INSTALL_PATH in repository_ctx.os.environ:
     cudnn_install_path = repository_ctx.os.environ[_CUDNN_INSTALL_PATH].strip()
   if not repository_ctx.path(cudnn_install_path).exists:
-    fail("Cannot find cudnn install path.")
+    auto_configure_fail("Cannot find cudnn install path.")
   return cudnn_install_path
 
 
@@ -144,7 +168,7 @@
     #     if re.match("[0-9]+.[0-9]+", capability) == None:
     parts = capability.split(".")
     if len(parts) != 2 or not parts[0].isdigit() or not parts[1].isdigit():
-      fail("Invalid compute capability: %s" % capability)
+      auto_configure_fail("Invalid compute capability: %s" % capability)
   return capabilities
 
 
@@ -186,7 +210,7 @@
         cuda_fft_lib = "lib/libcufft%s.dylib" % cuda_ext,
         cuda_cupti_lib = "extras/CUPTI/lib/libcupti%s.dylib" % cuda_ext)
   else:
-    fail("Not supported CPU value %s" % cpu_value)
+    auto_configure_fail("Not supported CPU value %s" % cpu_value)
 
 
 def _check_lib(repository_ctx, cuda_toolkit_path, cuda_lib):
@@ -199,7 +223,7 @@
   """
   lib_path = cuda_toolkit_path + "/" + cuda_lib
   if not repository_ctx.path(lib_path).exists:
-    fail("Cannot find %s" % lib_path)
+    auto_configure_fail("Cannot find %s" % lib_path)
 
 
 def _check_dir(repository_ctx, directory):
@@ -210,7 +234,7 @@
     directory: The directory to check the existence of.
   """
   if not repository_ctx.path(directory).exists:
-    fail("Cannot find dir: %s" % directory)
+    auto_configure_fail("Cannot find dir: %s" % directory)
 
 
 def _find_cudnn_header_dir(repository_ctx, cudnn_install_basedir):
@@ -230,7 +254,7 @@
     return cudnn_install_basedir + "/include"
   if repository_ctx.path("/usr/include/cudnn.h").exists:
     return "/usr/include"
-  fail("Cannot find cudnn.h under %s" % cudnn_install_basedir)
+  auto_configure_fail("Cannot find cudnn.h under %s" % cudnn_install_basedir)
 
 
 def _find_cudnn_lib_path(repository_ctx, cudnn_install_basedir, symlink_files):
@@ -252,7 +276,7 @@
   if repository_ctx.path(alt_lib_dir).exists:
     return alt_lib_dir
 
-  fail("Cannot find %s or %s under %s" %
+  auto_configure_fail("Cannot find %s or %s under %s" %
        (symlink_files.cuda_dnn_lib, symlink_files.cuda_dnn_lib_alt,
         cudnn_install_basedir))
 
@@ -380,15 +404,18 @@
 
   # Set up crosstool/
   _file(repository_ctx, "crosstool:BUILD")
+  cc = find_cc(repository_ctx)
+  gcc_host_compiler_includes = _gcc_host_compiler_includes(repository_ctx, cc)
   _tpl(repository_ctx, "crosstool:CROSSTOOL",
        {
            "%{cuda_version}": ("-%s" % cuda_version) if cuda_version else "",
+           "%{gcc_host_compiler_includes}": gcc_host_compiler_includes,
        })
   _tpl(repository_ctx,
        "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
        {
-           "%{cpu_compiler}": str(find_cc(repository_ctx)),
-           "%{gcc_host_compiler_path}": str(find_cc(repository_ctx)),
+           "%{cpu_compiler}": str(cc),
+           "%{gcc_host_compiler_path}": str(cc),
            "%{cuda_compute_capabilities}": ", ".join(
                ["\"%s\"" % c for c in compute_capabilities]),
        })
diff --git a/util/python/python_config.sh b/util/python/python_config.sh
index 00619c6..d7090a4 100755
--- a/util/python/python_config.sh
+++ b/util/python/python_config.sh
@@ -60,10 +60,10 @@
 if os.getenv('PYTHONPATH') is not None:
   python_paths = os.getenv('PYTHONPATH').split(':')
 try:
-  library_paths =  site.getsitepackages()
+  library_paths = site.getsitepackages()
 except AttributeError:
  from distutils.sysconfig import get_python_lib
- library_paths = get_python_lib()
+ library_paths = [get_python_lib()]
 all_paths = set(python_paths + library_paths)
 
 paths = []